[dpdk-dev] [PATCH v4 0/5] vhost: generalize buffer vectors

2018-07-06 Thread Maxime Coquelin
This series is again preliminray work to ease packed ring
layout integration.

Main changes are using vector buffres also in the dequeue
path, and perform IOVA to HVA translation at vectors fill
time.

I still have to run more benchmarks, but PVP benchmarks does
not show performance changes.

Good thing is that it saves ~140 further lines.

Changes since v3:
=
- Fix dequeue_zero_copy last_used_idx update (Tiwei)
- Remove "vhost: make gpa to hpa failure an error" patch (Tiwei)

Changes since v2:
=
 - check vec_id doesn't overflow (Tiwei)
 - Fix perm parameters passed to fill_vec_buf (Tiwei)
 - Remove extra space in variable assignation (Tiwei)


Maxime Coquelin (5):
  vhost: use shadow used ring in dequeue path
  vhost: use buffer vectors in dequeue path
  vhost: improve prefetching in dequeue path
  vhost: prefetch first descriptor in dequeue path
  vhost: improve prefetching in enqueue path

 lib/librte_vhost/vhost.h  |   1 +
 lib/librte_vhost/virtio_net.c | 517 --
 2 files changed, 193 insertions(+), 325 deletions(-)

-- 
2.14.4



[dpdk-dev] [PATCH v4 1/5] vhost: use shadow used ring in dequeue path

2018-07-06 Thread Maxime Coquelin
Relax used ring contention by reusing the shadow used
ring feature used by enqueue path.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/virtio_net.c | 50 +--
 1 file changed, 10 insertions(+), 40 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 98ad8e936..741267345 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1019,35 +1019,6 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
return error;
 }
 
-static __rte_always_inline void
-update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
-uint32_t used_idx, uint32_t desc_idx)
-{
-   vq->used->ring[used_idx].id  = desc_idx;
-   vq->used->ring[used_idx].len = 0;
-   vhost_log_cache_used_vring(dev, vq,
-   offsetof(struct vring_used, ring[used_idx]),
-   sizeof(vq->used->ring[used_idx]));
-}
-
-static __rte_always_inline void
-update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq,
-   uint32_t count)
-{
-   if (unlikely(count == 0))
-   return;
-
-   rte_smp_wmb();
-   rte_smp_rmb();
-
-   vhost_log_cache_sync(dev, vq);
-
-   vq->used->idx += count;
-   vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
-   sizeof(vq->used->idx));
-   vhost_vring_call(dev, vq);
-}
-
 static __rte_always_inline struct zcopy_mbuf *
 get_zmbuf(struct vhost_virtqueue *vq)
 {
@@ -1115,7 +1086,6 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
struct rte_mbuf *rarp_mbuf = NULL;
struct vhost_virtqueue *vq;
uint32_t desc_indexes[MAX_PKT_BURST];
-   uint32_t used_idx;
uint32_t i = 0;
uint16_t free_entries;
uint16_t avail_idx;
@@ -1146,6 +1116,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
goto out_access_unlock;
 
vq->batch_copy_nb_elems = 0;
+   vq->shadow_used_idx = 0;
 
if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
vhost_user_iotlb_rd_lock(vq);
@@ -1163,9 +1134,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
next = TAILQ_NEXT(zmbuf, next);
 
if (mbuf_is_consumed(zmbuf->mbuf)) {
-   used_idx = vq->last_used_idx++ & (vq->size - 1);
-   update_used_ring(dev, vq, used_idx,
-zmbuf->desc_idx);
+   update_shadow_used_ring(vq, zmbuf->desc_idx, 0);
nr_updated += 1;
 
TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
@@ -1176,7 +1145,9 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
}
}
 
-   update_used_idx(dev, vq, nr_updated);
+   flush_shadow_used_ring(dev, vq);
+   vhost_vring_call(dev, vq);
+   vq->shadow_used_idx = 0;
}
 
/*
@@ -1217,9 +1188,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 
/* Prefetch available and used ring */
avail_idx = vq->last_avail_idx & (vq->size - 1);
-   used_idx  = vq->last_used_idx  & (vq->size - 1);
rte_prefetch0(&vq->avail->ring[avail_idx]);
-   rte_prefetch0(&vq->used->ring[used_idx]);
 
count = RTE_MIN(count, MAX_PKT_BURST);
count = RTE_MIN(count, free_entries);
@@ -1229,11 +1198,10 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
/* Retrieve all of the head indexes first to avoid caching issues. */
for (i = 0; i < count; i++) {
avail_idx = (vq->last_avail_idx + i) & (vq->size - 1);
-   used_idx  = (vq->last_used_idx  + i) & (vq->size - 1);
desc_indexes[i] = vq->avail->ring[avail_idx];
 
if (likely(dev->dequeue_zero_copy == 0))
-   update_used_ring(dev, vq, used_idx, desc_indexes[i]);
+   update_shadow_used_ring(vq, desc_indexes[i], 0);
}
 
/* Prefetch descriptor index. */
@@ -1326,8 +1294,10 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 
if (likely(dev->dequeue_zero_copy == 0)) {
do_data_copy_dequeue(vq);
-   vq->last_used_idx += i;
-   update_used_idx(dev, vq, i);
+   if (unlikely(i < count))
+   vq->shadow_used_idx = i;
+   flush_shadow_used_ring(dev, vq);
+   vhost_vring_call(dev, vq);
}
 
 out:
-- 
2.14.4



[dpdk-dev] [PATCH v4 2/5] vhost: use buffer vectors in dequeue path

2018-07-06 Thread Maxime Coquelin
To ease packed ring layout integration, this patch makes
the dequeue path to re-use buffer vectors implemented for
enqueue path.

Doing this, copy_desc_to_mbuf() is now ring layout type
agnostic.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost.h  |   1 +
 lib/librte_vhost/virtio_net.c | 451 --
 2 files changed, 167 insertions(+), 285 deletions(-)

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 3437b996b..79e3117d2 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -43,6 +43,7 @@
  * from vring to do scatter RX.
  */
 struct buf_vector {
+   uint64_t buf_iova;
uint64_t buf_addr;
uint32_t buf_len;
uint32_t desc_idx;
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 741267345..6339296c7 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -225,12 +225,12 @@ static __rte_always_inline int
 fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 uint32_t avail_idx, uint32_t *vec_idx,
 struct buf_vector *buf_vec, uint16_t *desc_chain_head,
-uint16_t *desc_chain_len)
+uint16_t *desc_chain_len, uint8_t perm)
 {
uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
uint32_t vec_id = *vec_idx;
uint32_t len= 0;
-   uint64_t dlen;
+   uint64_t dlen, desc_avail, desc_iova;
struct vring_desc *descs = vq->desc;
struct vring_desc *idesc = NULL;
 
@@ -261,16 +261,43 @@ fill_vec_buf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
}
 
while (1) {
-   if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size)) {
+   if (unlikely(idx >= vq->size)) {
free_ind_table(idesc);
return -1;
}
 
+
len += descs[idx].len;
-   buf_vec[vec_id].buf_addr = descs[idx].addr;
-   buf_vec[vec_id].buf_len  = descs[idx].len;
-   buf_vec[vec_id].desc_idx = idx;
-   vec_id++;
+   desc_avail = descs[idx].len;
+   desc_iova = descs[idx].addr;
+
+   while (desc_avail) {
+   uint64_t desc_addr;
+   uint64_t desc_chunck_len = desc_avail;
+
+   if (unlikely(vec_id >= BUF_VECTOR_MAX)) {
+   free_ind_table(idesc);
+   return -1;
+   }
+
+   desc_addr = vhost_iova_to_vva(dev, vq,
+   desc_iova,
+   &desc_chunck_len,
+   perm);
+   if (unlikely(!desc_addr)) {
+   free_ind_table(idesc);
+   return -1;
+   }
+
+   buf_vec[vec_id].buf_iova = desc_iova;
+   buf_vec[vec_id].buf_addr = desc_addr;
+   buf_vec[vec_id].buf_len  = desc_chunck_len;
+   buf_vec[vec_id].desc_idx = idx;
+
+   desc_avail -= desc_chunck_len;
+   desc_iova += desc_chunck_len;
+   vec_id++;
+   }
 
if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
break;
@@ -293,7 +320,8 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue 
*vq,
 static inline int
 reserve_avail_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint32_t size, struct buf_vector *buf_vec,
-   uint16_t *num_buffers, uint16_t avail_head)
+   uint16_t *num_buffers, uint16_t avail_head,
+   uint16_t *nr_vec)
 {
uint16_t cur_idx;
uint32_t vec_idx = 0;
@@ -315,7 +343,8 @@ reserve_avail_buf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
return -1;
 
if (unlikely(fill_vec_buf(dev, vq, cur_idx, &vec_idx, buf_vec,
-   &head_idx, &len) < 0))
+   &head_idx, &len,
+   VHOST_ACCESS_RW) < 0))
return -1;
len = RTE_MIN(len, size);
update_shadow_used_ring(vq, head_idx, len);
@@ -334,21 +363,22 @@ reserve_avail_buf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
return -1;
}
 
+   *nr_vec = vec_idx;
+
return 0;
 }
 
 static __rte_always_inline int
 copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
struct rte_mbuf *m, struct buf_vector *buf_vec,
-   uint16_t num_b

[dpdk-dev] [PATCH v4 3/5] vhost: improve prefetching in dequeue path

2018-07-06 Thread Maxime Coquelin
This is an optimization to prefetch next buffer while the
current one is being processed.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/virtio_net.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 6339296c7..2cfd8585c 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -902,7 +902,13 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
buf_iova = buf_vec[vec_idx].buf_iova;
buf_len = buf_vec[vec_idx].buf_len;
 
-   rte_prefetch0((void *)(uintptr_t)buf_addr);
+   /*
+* Prefecth desc n + 1 buffer while
+* desc n buffer is processed.
+*/
+   if (vec_idx + 1 < nr_vec)
+   rte_prefetch0((void *)(uintptr_t)
+   buf_vec[vec_idx + 1].buf_addr);
 
buf_offset = 0;
buf_avail  = buf_len;
@@ -1134,6 +1140,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
if (likely(dev->dequeue_zero_copy == 0))
update_shadow_used_ring(vq, head_idx, 0);
 
+   rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
+
pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
if (unlikely(pkts[i] == NULL)) {
RTE_LOG(ERR, VHOST_DATA,
-- 
2.14.4



[dpdk-dev] [PATCH v4 4/5] vhost: prefetch first descriptor in dequeue path

2018-07-06 Thread Maxime Coquelin
Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/virtio_net.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 2cfd8585c..2662a1d32 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1083,6 +1083,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
vq->shadow_used_idx = 0;
}
 
+   rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
+
/*
 * Construct a RARP broadcast packet, and inject it to the "pkts"
 * array, to looks like that guest actually send such packet.
-- 
2.14.4



[dpdk-dev] [PATCH v4 5/5] vhost: improve prefetching in enqueue path

2018-07-06 Thread Maxime Coquelin
This is an optimization to prefetch next buffer while the
current one is being processed.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/virtio_net.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 2662a1d32..82d5d9e17 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -393,6 +393,9 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
buf_iova = buf_vec[vec_idx].buf_iova;
buf_len = buf_vec[vec_idx].buf_len;
 
+   if (nr_vec > 1)
+   rte_prefetch0((void *)(uintptr_t)buf_vec[1].buf_addr);
+
if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
error = -1;
goto out;
@@ -404,7 +407,6 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
hdr = &tmp_hdr;
else
hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
-   rte_prefetch0((void *)(uintptr_t)hdr_addr);
 
VHOST_LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
dev->vid, num_buffers);
@@ -436,8 +438,10 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
buf_iova = buf_vec[vec_idx].buf_iova;
buf_len = buf_vec[vec_idx].buf_len;
 
-   /* Prefetch buffer address. */
-   rte_prefetch0((void *)(uintptr_t)buf_addr);
+   /* Prefetch next buffer address. */
+   if (vec_idx + 1 < nr_vec)
+   rte_prefetch0((void *)(uintptr_t)
+   buf_vec[vec_idx + 1].buf_addr);
buf_offset = 0;
buf_avail  = buf_len;
}
@@ -579,6 +583,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
break;
}
 
+   rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
+
VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index 
%d\n",
dev->vid, vq->last_avail_idx,
vq->last_avail_idx + num_buffers);
-- 
2.14.4



[dpdk-dev] [PATCH v9 03/15] vhost: vring address setup for packed queues

2018-07-06 Thread Maxime Coquelin
From: Yuanhan Liu 

Add code to set up packed queues when enabled.

Signed-off-by: Yuanhan Liu 
Signed-off-by: Jens Freimann 
Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost.c  | 43 ++-
 lib/librte_vhost/vhost.h  |  7 ++-
 lib/librte_vhost/vhost_user.c | 39 +++
 3 files changed, 79 insertions(+), 10 deletions(-)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 13ce864db..0bf2cc14a 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -115,14 +115,11 @@ free_device(struct virtio_net *dev)
rte_free(dev);
 }
 
-int
-vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+static int
+vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
uint64_t req_size, size;
 
-   if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
-   goto out;
-
req_size = sizeof(struct vring_desc) * vq->size;
size = req_size;
vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
@@ -153,6 +150,40 @@ vring_translate(struct virtio_net *dev, struct 
vhost_virtqueue *vq)
if (!vq->used || size != req_size)
return -1;
 
+   return 0;
+}
+
+static int
+vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+   uint64_t req_size, size;
+
+   req_size = sizeof(struct vring_packed_desc) * vq->size;
+   size = req_size;
+   vq->desc_packed =
+   (struct vring_packed_desc *)(uintptr_t)vhost_iova_to_vva(dev,
+   vq, vq->ring_addrs.desc_user_addr,
+   &size, VHOST_ACCESS_RW);
+   if (!vq->desc_packed || size != req_size)
+   return -1;
+
+   return 0;
+}
+
+int
+vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+
+   if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
+   goto out;
+
+   if (vq_is_packed(dev)) {
+   if (vring_translate_packed(dev, vq) < 0)
+   return -1;
+   } else {
+   if (vring_translate_split(dev, vq) < 0)
+   return -1;
+   }
 out:
vq->access_ok = 1;
 
@@ -234,6 +265,8 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t 
vring_idx)
dev->virtqueue[vring_idx] = vq;
init_vring_queue(dev, vring_idx);
rte_spinlock_init(&vq->access_lock);
+   vq->avail_wrap_counter = 1;
+   vq->used_wrap_counter = 1;
 
dev->nr_vring += 1;
 
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index d362823a9..b486682c5 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -84,7 +84,10 @@ struct log_cache_entry {
  * Structure contains variables relevant to RX/TX virtqueues.
  */
 struct vhost_virtqueue {
-   struct vring_desc   *desc;
+   union {
+   struct vring_desc   *desc;
+   struct vring_packed_desc   *desc_packed;
+   };
struct vring_avail  *avail;
struct vring_used   *used;
uint32_tsize;
@@ -122,6 +125,8 @@ struct vhost_virtqueue {
 
struct batch_copy_elem  *batch_copy_elems;
uint16_tbatch_copy_nb_elems;
+   boolused_wrap_counter;
+   boolavail_wrap_counter;
 
struct log_cache_entry log_cache[VHOST_LOG_CACHE_NR];
uint16_t log_cache_nb_elem;
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index bea6a0428..dca43ff00 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -467,6 +467,27 @@ translate_ring_addresses(struct virtio_net *dev, int 
vq_index)
struct vhost_vring_addr *addr = &vq->ring_addrs;
uint64_t len;
 
+   if (vq_is_packed(dev)) {
+   len = sizeof(struct vring_packed_desc) * vq->size;
+   vq->desc_packed = (struct vring_packed_desc *) ring_addr_to_vva
+   (dev, vq, addr->desc_user_addr, &len);
+   vq->log_guest_addr = 0;
+   if (vq->desc_packed == NULL ||
+   len != sizeof(struct vring_packed_desc) *
+   vq->size) {
+   RTE_LOG(DEBUG, VHOST_CONFIG,
+   "(%d) failed to map desc_packed ring.\n",
+   dev->vid);
+   return dev;
+   }
+
+   dev = numa_realloc(dev, vq_index);
+   vq = dev->virtqueue[vq_index];
+   addr = &vq->ring_addrs;
+
+   return dev;
+   }
+
/* The addresses are converted from QEMU virtual to Vhost virtual. */
if (vq->desc && vq->avail && vq->used)
return dev;
@@ -875,10 +896,20 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct 

[dpdk-dev] [PATCH v9 01/15] vhost: add virtio packed virtqueue defines

2018-07-06 Thread Maxime Coquelin
From: Jens Freimann 

Signed-off-by: Jens Freimann 
---
 lib/librte_vhost/vhost.h | 20 
 1 file changed, 20 insertions(+)

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 79e3117d2..83c028a1b 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -192,6 +192,26 @@ struct vhost_msg {
  #define VIRTIO_F_VERSION_1 32
 #endif
 
+/* Declare packed ring related bits for older kernels */
+#ifndef VIRTIO_F_RING_PACKED
+
+#define VIRTIO_F_RING_PACKED 34
+
+#define VRING_DESC_F_NEXT  1
+#define VRING_DESC_F_WRITE 2
+#define VRING_DESC_F_INDIRECT  4
+
+#define VRING_DESC_F_AVAIL (1ULL << 7)
+#define VRING_DESC_F_USED  (1ULL << 15)
+
+struct vring_packed_desc {
+   uint64_t addr;
+   uint32_t len;
+   uint16_t id;
+   uint16_t flags;
+};
+#endif
+
 /*
  * Available and used descs are in same order
  */
-- 
2.14.4



[dpdk-dev] [PATCH v9 00/15] Vhost: add support to packed ring layout

2018-07-06 Thread Maxime Coquelin
This series is a handover from Jen's "[PATCH v4 00/20]
implement packed virtqueues", which only implements the
vhost side. Virtio PMD implementation will follow in a 
next series.

The series applies on top of previous reworks I posted
during this cycle that merges mergeable and non-mergeable
receive paths, and refactors transmit path to re-use
vector buffers.

I haven't run performance tests for now as the Virtio PMD
side isn't ready.

The series has been tested with Tiwei's series implementing
packed ring support to Kernel's virtio-net driver, and
with Wei series implementing the Qemu side.

To test it, I have used testpmd on host side with a vhost
vdev and a tap vdev forwarding to each other. Transferts
of big random files have been done in both ways with
integrity verified.

Tests have been run with Rx mrg ON/OFF and events suppression
ON/OFF.

Tests have also been run with legacy split ring layout to
ensure no regression have been introduced.

Changes since v8:
=
- Fix indents (Tiwei)
- Rename struct vring_desc_packed to vring_packed_desc (Tiwei)

Changes since v7:
=
- Align structs and defines naming with Kernel header (Tiwei)
- Fix event based notifications (Tiwei)
- Fix Clang build issues caused by unused symbols (Tiwei)

Changes since v6:
=
- Various style cleanups (Tiwei, Jason)
- Simplify event based notification (Jason)
- Build support with future kernels (Tiwei)
- Prevent buffer vectors overflow in map_one_desc (Tiwei)

Changes since v5:
=
- Remove duplicated VHOST_USER_F_PROTOCOL_FEATURES definition (Tiwei)
- Fix vq_is_ready (Maxime)
- Fix local used index overflow in flush_shadow_used_ring_packed (Tiwei)
- Make desc_is_avail() a bool (Tiwei)
- Improve desc_is_avail() logic (Tiwei)
- Remove split rings addr NULL assignment in the right patch (Tiwei)
- Make buffer id a uint16_t (Tiwei)

Jens Freimann (2):
  vhost: add virtio packed virtqueue defines
  vhost: add helpers for packed virtqueues

Maxime Coquelin (12):
  vhost: clear shadow used table index at flush time
  vhost: make indirect desc table copy desc type agnostic
  vhost: clear batch copy index at copy time
  vhost: extract split ring handling from Rx and Tx functions
  vhost: append shadow used ring function names with split
  vhost: add shadow used ring support for packed rings
  vhost: create descriptor mapping function
  vhost: add vector filling support for packed ring
  vhost: add Rx support for packed ring
  vhost: add Tx support for packed ring
  vhost: add notification for packed ring
  vhost: advertize packed ring layout support

Yuanhan Liu (1):
  vhost: vring address setup for packed queues

 lib/librte_vhost/vhost.c  | 115 ++-
 lib/librte_vhost/vhost.h  | 130 ++-
 lib/librte_vhost/vhost_user.c | 127 +--
 lib/librte_vhost/virtio_net.c | 776 +-
 4 files changed, 939 insertions(+), 209 deletions(-)

-- 
2.14.4



[dpdk-dev] [PATCH v9 02/15] vhost: add helpers for packed virtqueues

2018-07-06 Thread Maxime Coquelin
From: Jens Freimann 

Add some helper functions to check descriptor flags
and check if a vring is of type packed.

Signed-off-by: Jens Freimann 
Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost.h | 13 +
 1 file changed, 13 insertions(+)

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 83c028a1b..d362823a9 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -344,6 +344,19 @@ struct virtio_net {
struct vhost_user_extern_ops extern_ops;
 } __rte_cache_aligned;
 
+static __rte_always_inline bool
+vq_is_packed(struct virtio_net *dev)
+{
+   return dev->features & (1ull << VIRTIO_F_RING_PACKED);
+}
+
+static inline bool
+desc_is_avail(struct vring_packed_desc *desc, bool wrap_counter)
+{
+   return wrap_counter == !!(desc->flags & VRING_DESC_F_AVAIL) &&
+   wrap_counter != !!(desc->flags & VRING_DESC_F_USED);
+}
+
 #define VHOST_LOG_PAGE 4096
 
 /*
-- 
2.14.4



[dpdk-dev] [PATCH v9 04/15] vhost: clear shadow used table index at flush time

2018-07-06 Thread Maxime Coquelin
Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/virtio_net.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 82d5d9e17..d6b30899f 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -115,6 +115,7 @@ flush_shadow_used_ring(struct virtio_net *dev, struct 
vhost_virtqueue *vq)
vhost_log_cache_sync(dev, vq);
 
*(volatile uint16_t *)&vq->used->idx += vq->shadow_used_idx;
+   vq->shadow_used_idx = 0;
vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
sizeof(vq->used->idx));
 }
@@ -567,7 +568,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
 
-   vq->shadow_used_idx = 0;
avail_head = *((volatile uint16_t *)&vq->avail->idx);
for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
@@ -1055,7 +1055,6 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
goto out_access_unlock;
 
vq->batch_copy_nb_elems = 0;
-   vq->shadow_used_idx = 0;
 
if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
vhost_user_iotlb_rd_lock(vq);
@@ -1086,7 +1085,6 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 
flush_shadow_used_ring(dev, vq);
vhost_vring_call(dev, vq);
-   vq->shadow_used_idx = 0;
}
 
rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
-- 
2.14.4



[dpdk-dev] [PATCH v9 06/15] vhost: clear batch copy index at copy time

2018-07-06 Thread Maxime Coquelin
Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/virtio_net.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 3bed77eec..5cc3138d0 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -141,6 +141,8 @@ do_data_copy_enqueue(struct virtio_net *dev, struct 
vhost_virtqueue *vq)
vhost_log_cache_write(dev, vq, elem[i].log_addr, elem[i].len);
PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0);
}
+
+   vq->batch_copy_nb_elems = 0;
 }
 
 static inline void
@@ -152,6 +154,8 @@ do_data_copy_dequeue(struct vhost_virtqueue *vq)
 
for (i = 0; i < count; i++)
rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
+
+   vq->batch_copy_nb_elems = 0;
 }
 
 /* avoid write operation when necessary, to lessen cache issues */
@@ -564,8 +568,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
if (count == 0)
goto out;
 
-   vq->batch_copy_nb_elems = 0;
-
rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
 
avail_head = *((volatile uint16_t *)&vq->avail->idx);
@@ -1054,8 +1056,6 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
if (unlikely(vq->enabled == 0))
goto out_access_unlock;
 
-   vq->batch_copy_nb_elems = 0;
-
if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
vhost_user_iotlb_rd_lock(vq);
 
-- 
2.14.4



[dpdk-dev] [PATCH v9 05/15] vhost: make indirect desc table copy desc type agnostic

2018-07-06 Thread Maxime Coquelin
Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/virtio_net.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index d6b30899f..3bed77eec 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -37,16 +37,15 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t 
nr_vring)
return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring;
 }
 
-static __rte_always_inline struct vring_desc *
+static __rte_always_inline void *
 alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
-struct vring_desc *desc)
+uint64_t desc_addr, uint64_t desc_len)
 {
-   struct vring_desc *idesc;
+   void *idesc;
uint64_t src, dst;
-   uint64_t len, remain = desc->len;
-   uint64_t desc_addr = desc->addr;
+   uint64_t len, remain = desc_len;
 
-   idesc = rte_malloc(__func__, desc->len, 0);
+   idesc = rte_malloc(__func__, desc_len, 0);
if (unlikely(!idesc))
return 0;
 
@@ -72,7 +71,7 @@ alloc_copy_ind_table(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 }
 
 static __rte_always_inline void
-free_ind_table(struct vring_desc *idesc)
+free_ind_table(void *idesc)
 {
rte_free(idesc);
 }
@@ -251,7 +250,8 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue 
*vq,
 * The indirect desc table is not contiguous
 * in process VA space, we have to copy it.
 */
-   idesc = alloc_copy_ind_table(dev, vq, &vq->desc[idx]);
+   idesc = alloc_copy_ind_table(dev, vq,
+   vq->desc[idx].addr, vq->desc[idx].len);
if (unlikely(!idesc))
return -1;
 
-- 
2.14.4



[dpdk-dev] [PATCH v9 08/15] vhost: append shadow used ring function names with split

2018-07-06 Thread Maxime Coquelin
Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/virtio_net.c | 28 +++-
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index bdfd6ebef..ae256e062 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -77,8 +77,9 @@ free_ind_table(void *idesc)
 }
 
 static __rte_always_inline void
-do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
- uint16_t to, uint16_t from, uint16_t size)
+do_flush_shadow_used_ring_split(struct virtio_net *dev,
+   struct vhost_virtqueue *vq,
+   uint16_t to, uint16_t from, uint16_t size)
 {
rte_memcpy(&vq->used->ring[to],
&vq->shadow_used_ring[from],
@@ -89,22 +90,22 @@ do_flush_shadow_used_ring(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 }
 
 static __rte_always_inline void
-flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq)
+flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue 
*vq)
 {
uint16_t used_idx = vq->last_used_idx & (vq->size - 1);
 
if (used_idx + vq->shadow_used_idx <= vq->size) {
-   do_flush_shadow_used_ring(dev, vq, used_idx, 0,
+   do_flush_shadow_used_ring_split(dev, vq, used_idx, 0,
  vq->shadow_used_idx);
} else {
uint16_t size;
 
/* update used ring interval [used_idx, vq->size] */
size = vq->size - used_idx;
-   do_flush_shadow_used_ring(dev, vq, used_idx, 0, size);
+   do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size);
 
/* update the left half used ring interval [0, left_size] */
-   do_flush_shadow_used_ring(dev, vq, 0, size,
+   do_flush_shadow_used_ring_split(dev, vq, 0, size,
  vq->shadow_used_idx - size);
}
vq->last_used_idx += vq->shadow_used_idx;
@@ -120,7 +121,7 @@ flush_shadow_used_ring(struct virtio_net *dev, struct 
vhost_virtqueue *vq)
 }
 
 static __rte_always_inline void
-update_shadow_used_ring(struct vhost_virtqueue *vq,
+update_shadow_used_ring_split(struct vhost_virtqueue *vq,
 uint16_t desc_idx, uint16_t len)
 {
uint16_t i = vq->shadow_used_idx++;
@@ -353,7 +354,7 @@ reserve_avail_buf_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
VHOST_ACCESS_RW) < 0))
return -1;
len = RTE_MIN(len, size);
-   update_shadow_used_ring(vq, head_idx, len);
+   update_shadow_used_ring_split(vq, head_idx, len);
size -= len;
 
cur_idx++;
@@ -579,7 +580,7 @@ virtio_dev_rx_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
do_data_copy_enqueue(dev, vq);
 
if (likely(vq->shadow_used_idx)) {
-   flush_shadow_used_ring(dev, vq);
+   flush_shadow_used_ring_split(dev, vq);
vhost_vring_call(dev, vq);
}
 
@@ -1048,7 +1049,8 @@ virtio_dev_tx_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
next = TAILQ_NEXT(zmbuf, next);
 
if (mbuf_is_consumed(zmbuf->mbuf)) {
-   update_shadow_used_ring(vq, zmbuf->desc_idx, 0);
+   update_shadow_used_ring_split(vq,
+   zmbuf->desc_idx, 0);
nr_updated += 1;
 
TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
@@ -1059,7 +1061,7 @@ virtio_dev_tx_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
}
}
 
-   flush_shadow_used_ring(dev, vq);
+   flush_shadow_used_ring_split(dev, vq);
vhost_vring_call(dev, vq);
}
 
@@ -1091,7 +1093,7 @@ virtio_dev_tx_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
break;
 
if (likely(dev->dequeue_zero_copy == 0))
-   update_shadow_used_ring(vq, head_idx, 0);
+   update_shadow_used_ring_split(vq, head_idx, 0);
 
rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
 
@@ -1138,7 +1140,7 @@ virtio_dev_tx_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
do_data_copy_dequeue(vq);
if (unlikely(i < count))
vq->shadow_used_idx = i;
-   flush_shadow_used_ring(dev, vq);
+   flush_shadow_used_ring_split(dev, vq);
vhost_vring_call(dev, vq);
}
 
-- 
2.14.4



[dpdk-dev] [PATCH v9 07/15] vhost: extract split ring handling from Rx and Tx functions

2018-07-06 Thread Maxime Coquelin
Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/virtio_net.c | 238 +++---
 1 file changed, 129 insertions(+), 109 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 5cc3138d0..bdfd6ebef 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -226,13 +226,13 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct 
virtio_net_hdr *net_hdr)
 }
 
 static __rte_always_inline int
-fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
-uint32_t avail_idx, uint32_t *vec_idx,
+fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
+uint32_t avail_idx, uint16_t *vec_idx,
 struct buf_vector *buf_vec, uint16_t *desc_chain_head,
 uint16_t *desc_chain_len, uint8_t perm)
 {
uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
-   uint32_t vec_id = *vec_idx;
+   uint16_t vec_id = *vec_idx;
uint32_t len= 0;
uint64_t dlen, desc_avail, desc_iova;
struct vring_desc *descs = vq->desc;
@@ -323,13 +323,13 @@ fill_vec_buf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
  * Returns -1 on fail, 0 on success
  */
 static inline int
-reserve_avail_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
+reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint32_t size, struct buf_vector *buf_vec,
uint16_t *num_buffers, uint16_t avail_head,
uint16_t *nr_vec)
 {
uint16_t cur_idx;
-   uint32_t vec_idx = 0;
+   uint16_t vec_idx = 0;
uint16_t max_tries, tries = 0;
 
uint16_t head_idx = 0;
@@ -347,7 +347,8 @@ reserve_avail_buf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
if (unlikely(cur_idx == avail_head))
return -1;
 
-   if (unlikely(fill_vec_buf(dev, vq, cur_idx, &vec_idx, buf_vec,
+   if (unlikely(fill_vec_buf_split(dev, vq, cur_idx,
+   &vec_idx, buf_vec,
&head_idx, &len,
VHOST_ACCESS_RW) < 0))
return -1;
@@ -534,48 +535,22 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 }
 
 static __rte_always_inline uint32_t
-virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
+virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
struct rte_mbuf **pkts, uint32_t count)
 {
-   struct vhost_virtqueue *vq;
uint32_t pkt_idx = 0;
uint16_t num_buffers;
struct buf_vector buf_vec[BUF_VECTOR_MAX];
uint16_t avail_head;
 
-   VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
-   if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
-   RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
-   dev->vid, __func__, queue_id);
-   return 0;
-   }
-
-   vq = dev->virtqueue[queue_id];
-
-   rte_spinlock_lock(&vq->access_lock);
-
-   if (unlikely(vq->enabled == 0))
-   goto out_access_unlock;
-
-   if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-   vhost_user_iotlb_rd_lock(vq);
-
-   if (unlikely(vq->access_ok == 0))
-   if (unlikely(vring_translate(dev, vq) < 0))
-   goto out;
-
-   count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
-   if (count == 0)
-   goto out;
-
rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
-
avail_head = *((volatile uint16_t *)&vq->avail->idx);
+
for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
uint16_t nr_vec = 0;
 
-   if (unlikely(reserve_avail_buf(dev, vq,
+   if (unlikely(reserve_avail_buf_split(dev, vq,
pkt_len, buf_vec, &num_buffers,
avail_head, &nr_vec) < 0)) {
VHOST_LOG_DEBUG(VHOST_DATA,
@@ -608,6 +583,42 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
vhost_vring_call(dev, vq);
}
 
+   return pkt_idx;
+}
+
+static __rte_always_inline uint32_t
+virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
+   struct rte_mbuf **pkts, uint32_t count)
+{
+   struct vhost_virtqueue *vq;
+
+   VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
+   if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
+   RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
+   dev->vid, __func__, queue

[dpdk-dev] [PATCH v9 09/15] vhost: add shadow used ring support for packed rings

2018-07-06 Thread Maxime Coquelin
Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost.c  |  9 --
 lib/librte_vhost/vhost.h  | 13 ++--
 lib/librte_vhost/vhost_user.c | 64 --
 lib/librte_vhost/virtio_net.c | 71 +--
 4 files changed, 133 insertions(+), 24 deletions(-)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 0bf2cc14a..534f30cf6 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -93,9 +93,12 @@ cleanup_device(struct virtio_net *dev, int destroy)
 }
 
 void
-free_vq(struct vhost_virtqueue *vq)
+free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
-   rte_free(vq->shadow_used_ring);
+   if (vq_is_packed(dev))
+   rte_free(vq->shadow_used_packed);
+   else
+   rte_free(vq->shadow_used_split);
rte_free(vq->batch_copy_elems);
rte_mempool_free(vq->iotlb_pool);
rte_free(vq);
@@ -110,7 +113,7 @@ free_device(struct virtio_net *dev)
uint32_t i;
 
for (i = 0; i < dev->nr_vring; i++)
-   free_vq(dev->virtqueue[i]);
+   free_vq(dev, dev->virtqueue[i]);
 
rte_free(dev);
 }
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index b486682c5..70f0eebcf 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -80,6 +80,12 @@ struct log_cache_entry {
unsigned long val;
 };
 
+struct vring_used_elem_packed {
+   uint16_t id;
+   uint32_t len;
+   uint32_t count;
+};
+
 /**
  * Structure contains variables relevant to RX/TX virtqueues.
  */
@@ -119,7 +125,10 @@ struct vhost_virtqueue {
struct zcopy_mbuf   *zmbufs;
struct zcopy_mbuf_list  zmbuf_list;
 
-   struct vring_used_elem  *shadow_used_ring;
+   union {
+   struct vring_used_elem  *shadow_used_split;
+   struct vring_used_elem_packed *shadow_used_packed;
+   };
uint16_tshadow_used_idx;
struct vhost_vring_addr ring_addrs;
 
@@ -587,7 +596,7 @@ void vhost_destroy_device(int);
 void vhost_destroy_device_notify(struct virtio_net *dev);
 
 void cleanup_vq(struct vhost_virtqueue *vq, int destroy);
-void free_vq(struct vhost_virtqueue *vq);
+void free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq);
 
 int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx);
 
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index dca43ff00..71d1fe0ac 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -233,7 +233,7 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t 
features)
 
dev->virtqueue[dev->nr_vring] = NULL;
cleanup_vq(vq, 1);
-   free_vq(vq);
+   free_vq(dev, vq);
}
}
 
@@ -282,13 +282,26 @@ vhost_user_set_vring_num(struct virtio_net *dev,
TAILQ_INIT(&vq->zmbuf_list);
}
 
-   vq->shadow_used_ring = rte_malloc(NULL,
+   if (vq_is_packed(dev)) {
+   vq->shadow_used_packed = rte_malloc(NULL,
+   vq->size *
+   sizeof(struct vring_used_elem_packed),
+   RTE_CACHE_LINE_SIZE);
+   if (!vq->shadow_used_packed) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "failed to allocate memory for shadow 
used ring.\n");
+   return -1;
+   }
+
+   } else {
+   vq->shadow_used_split = rte_malloc(NULL,
vq->size * sizeof(struct vring_used_elem),
RTE_CACHE_LINE_SIZE);
-   if (!vq->shadow_used_ring) {
-   RTE_LOG(ERR, VHOST_CONFIG,
-   "failed to allocate memory for shadow used ring.\n");
-   return -1;
+   if (!vq->shadow_used_split) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "failed to allocate memory for shadow 
used ring.\n");
+   return -1;
+   }
}
 
vq->batch_copy_elems = rte_malloc(NULL,
@@ -315,7 +328,8 @@ numa_realloc(struct virtio_net *dev, int index)
struct virtio_net *old_dev;
struct vhost_virtqueue *old_vq, *vq;
struct zcopy_mbuf *new_zmbuf;
-   struct vring_used_elem *new_shadow_used_ring;
+   struct vring_used_elem *new_shadow_used_split;
+   struct vring_used_elem_packed *new_shadow_used_packed;
struct batch_copy_elem *new_batch_copy_elems;
int ret;
 
@@ -350,13 +364,26 @@ numa_realloc(struct virtio_net *dev, int index)
vq->zmbufs = new_zmbuf;
}
 
-   new_shadow_used_ring = rte_malloc_socket(NULL,
-   vq->size * sizeof(struct vring_used_elem),
-   RTE

[dpdk-dev] [PATCH v9 13/15] vhost: add Tx support for packed ring

2018-07-06 Thread Maxime Coquelin
Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost.h  |   1 +
 lib/librte_vhost/virtio_net.c | 119 +-
 2 files changed, 119 insertions(+), 1 deletion(-)

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 70f0eebcf..a7e602bec 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -56,6 +56,7 @@ struct buf_vector {
 struct zcopy_mbuf {
struct rte_mbuf *mbuf;
uint32_t desc_idx;
+   uint16_t desc_count;
uint16_t in_use;
 
TAILQ_ENTRY(zcopy_mbuf) next;
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index ef86e5b40..9f33821f4 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1448,6 +1448,120 @@ virtio_dev_tx_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
return i;
 }
 
+static __rte_always_inline uint16_t
+virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+   struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
+{
+   uint16_t i;
+
+   rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
+
+   if (unlikely(dev->dequeue_zero_copy)) {
+   struct zcopy_mbuf *zmbuf, *next;
+
+   for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
+zmbuf != NULL; zmbuf = next) {
+   next = TAILQ_NEXT(zmbuf, next);
+
+   if (mbuf_is_consumed(zmbuf->mbuf)) {
+   update_shadow_used_ring_packed(vq,
+   zmbuf->desc_idx,
+   0,
+   zmbuf->desc_count);
+
+   TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
+   restore_mbuf(zmbuf->mbuf);
+   rte_pktmbuf_free(zmbuf->mbuf);
+   put_zmbuf(zmbuf);
+   vq->nr_zmbuf -= 1;
+   }
+   }
+
+   flush_shadow_used_ring_packed(dev, vq);
+   vhost_vring_call(dev, vq);
+   }
+
+   VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
+
+   count = RTE_MIN(count, MAX_PKT_BURST);
+   VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n",
+   dev->vid, count);
+
+   for (i = 0; i < count; i++) {
+   struct buf_vector buf_vec[BUF_VECTOR_MAX];
+   uint16_t buf_id, dummy_len;
+   uint16_t desc_count, nr_vec = 0;
+   int err;
+
+   if (unlikely(fill_vec_buf_packed(dev, vq,
+   vq->last_avail_idx, &desc_count,
+   buf_vec, &nr_vec,
+   &buf_id, &dummy_len,
+   VHOST_ACCESS_RW) < 0))
+   break;
+
+   if (likely(dev->dequeue_zero_copy == 0))
+   update_shadow_used_ring_packed(vq, buf_id, 0,
+   desc_count);
+
+   rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
+
+   pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
+   if (unlikely(pkts[i] == NULL)) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "Failed to allocate memory for mbuf.\n");
+   break;
+   }
+
+   err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
+   mbuf_pool);
+   if (unlikely(err)) {
+   rte_pktmbuf_free(pkts[i]);
+   break;
+   }
+
+   if (unlikely(dev->dequeue_zero_copy)) {
+   struct zcopy_mbuf *zmbuf;
+
+   zmbuf = get_zmbuf(vq);
+   if (!zmbuf) {
+   rte_pktmbuf_free(pkts[i]);
+   break;
+   }
+   zmbuf->mbuf = pkts[i];
+   zmbuf->desc_idx = buf_id;
+   zmbuf->desc_count = desc_count;
+
+   /*
+* Pin lock the mbuf; we will check later to see
+* whether the mbuf is freed (when we are the last
+* user) or not. If that's the case, we then could
+* update the used ring safely.
+*/
+   rte_mbuf_refcnt_update(pkts[i], 1);
+
+   vq->nr_zmbuf += 1;
+   TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
+   }
+
+   vq->last_avail_idx += desc_count;
+   if (vq->last_avail_idx >= vq->size) {
+   vq->last_avail_idx -= vq->size;
+  

[dpdk-dev] [PATCH v9 10/15] vhost: create descriptor mapping function

2018-07-06 Thread Maxime Coquelin
Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/virtio_net.c | 70 ---
 1 file changed, 40 insertions(+), 30 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 44b9daf4a..058786871 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -291,6 +291,40 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct 
virtio_net_hdr *net_hdr)
}
 }
 
+static __rte_always_inline int
+map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
+   struct buf_vector *buf_vec, uint16_t *vec_idx,
+   uint64_t desc_iova, uint64_t desc_len, uint8_t perm)
+{
+   uint16_t vec_id = *vec_idx;
+
+   while (desc_len) {
+   uint64_t desc_addr;
+   uint64_t desc_chunck_len = desc_len;
+
+   if (unlikely(vec_id >= BUF_VECTOR_MAX))
+   return -1;
+
+   desc_addr = vhost_iova_to_vva(dev, vq,
+   desc_iova,
+   &desc_chunck_len,
+   perm);
+   if (unlikely(!desc_addr))
+   return -1;
+
+   buf_vec[vec_id].buf_iova = desc_iova;
+   buf_vec[vec_id].buf_addr = desc_addr;
+   buf_vec[vec_id].buf_len  = desc_chunck_len;
+
+   desc_len -= desc_chunck_len;
+   desc_iova += desc_chunck_len;
+   vec_id++;
+   }
+   *vec_idx = vec_id;
+
+   return 0;
+}
+
 static __rte_always_inline int
 fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 uint32_t avail_idx, uint16_t *vec_idx,
@@ -300,7 +334,7 @@ fill_vec_buf_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
uint16_t vec_id = *vec_idx;
uint32_t len= 0;
-   uint64_t dlen, desc_avail, desc_iova;
+   uint64_t dlen;
struct vring_desc *descs = vq->desc;
struct vring_desc *idesc = NULL;
 
@@ -337,37 +371,13 @@ fill_vec_buf_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
return -1;
}
 
-
len += descs[idx].len;
-   desc_avail = descs[idx].len;
-   desc_iova = descs[idx].addr;
-
-   while (desc_avail) {
-   uint64_t desc_addr;
-   uint64_t desc_chunck_len = desc_avail;
-
-   if (unlikely(vec_id >= BUF_VECTOR_MAX)) {
-   free_ind_table(idesc);
-   return -1;
-   }
 
-   desc_addr = vhost_iova_to_vva(dev, vq,
-   desc_iova,
-   &desc_chunck_len,
-   perm);
-   if (unlikely(!desc_addr)) {
-   free_ind_table(idesc);
-   return -1;
-   }
-
-   buf_vec[vec_id].buf_iova = desc_iova;
-   buf_vec[vec_id].buf_addr = desc_addr;
-   buf_vec[vec_id].buf_len  = desc_chunck_len;
-   buf_vec[vec_id].desc_idx = idx;
-
-   desc_avail -= desc_chunck_len;
-   desc_iova += desc_chunck_len;
-   vec_id++;
+   if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
+   descs[idx].addr, descs[idx].len,
+   perm))) {
+   free_ind_table(idesc);
+   return -1;
}
 
if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
-- 
2.14.4



[dpdk-dev] [PATCH v9 11/15] vhost: add vector filling support for packed ring

2018-07-06 Thread Maxime Coquelin
Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/virtio_net.c | 111 ++
 1 file changed, 111 insertions(+)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 058786871..9171ee733 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -450,6 +450,117 @@ reserve_avail_buf_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
return 0;
 }
 
+static __rte_always_inline int
+fill_vec_buf_packed_indirect(struct virtio_net *dev,
+   struct vhost_virtqueue *vq,
+   struct vring_packed_desc *desc, uint16_t *vec_idx,
+   struct buf_vector *buf_vec, uint16_t *len, uint8_t perm)
+{
+   uint16_t i;
+   uint32_t nr_descs;
+   uint16_t vec_id = *vec_idx;
+   uint64_t dlen;
+   struct vring_packed_desc *descs, *idescs = NULL;
+
+   dlen = desc->len;
+   descs = (struct vring_packed_desc *)(uintptr_t)
+   vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO);
+   if (unlikely(!descs))
+   return -1;
+
+   if (unlikely(dlen < desc->len)) {
+   /*
+* The indirect desc table is not contiguous
+* in process VA space, we have to copy it.
+*/
+   idescs = alloc_copy_ind_table(dev, vq, desc->addr, desc->len);
+   if (unlikely(!idescs))
+   return -1;
+
+   descs = idescs;
+   }
+
+   nr_descs =  desc->len / sizeof(struct vring_packed_desc);
+   if (unlikely(nr_descs >= vq->size)) {
+   free_ind_table(idescs);
+   return -1;
+   }
+
+   for (i = 0; i < nr_descs; i++) {
+   if (unlikely(vec_id >= BUF_VECTOR_MAX)) {
+   free_ind_table(idescs);
+   return -1;
+   }
+
+   *len += descs[i].len;
+   if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
+   descs[i].addr, descs[i].len,
+   perm)))
+   return -1;
+   }
+   *vec_idx = vec_id;
+
+   if (unlikely(!!idescs))
+   free_ind_table(idescs);
+
+   return 0;
+}
+
+static __rte_unused __rte_always_inline int
+fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+   uint16_t avail_idx, uint16_t *desc_count,
+   struct buf_vector *buf_vec, uint16_t *vec_idx,
+   uint16_t *buf_id, uint16_t *len, uint8_t perm)
+{
+   bool wrap_counter = vq->avail_wrap_counter;
+   struct vring_packed_desc *descs = vq->desc_packed;
+   uint16_t vec_id = *vec_idx;
+
+   if (avail_idx < vq->last_avail_idx)
+   wrap_counter ^= 1;
+
+   if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter)))
+   return -1;
+
+   *desc_count = 0;
+
+   while (1) {
+   if (unlikely(vec_id >= BUF_VECTOR_MAX))
+   return -1;
+
+   *desc_count += 1;
+   *buf_id = descs[avail_idx].id;
+
+   if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) {
+   if (unlikely(fill_vec_buf_packed_indirect(dev, vq,
+   &descs[avail_idx],
+   &vec_id, buf_vec,
+   len, perm) < 0))
+   return -1;
+   } else {
+   *len += descs[avail_idx].len;
+
+   if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
+   descs[avail_idx].addr,
+   descs[avail_idx].len,
+   perm)))
+   return -1;
+   }
+
+   if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0)
+   break;
+
+   if (++avail_idx >= vq->size) {
+   avail_idx -= vq->size;
+   wrap_counter ^= 1;
+   }
+   }
+
+   *vec_idx = vec_id;
+
+   return 0;
+}
+
 static __rte_always_inline int
 copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
struct rte_mbuf *m, struct buf_vector *buf_vec,
-- 
2.14.4



[dpdk-dev] [PATCH v9 12/15] vhost: add Rx support for packed ring

2018-07-06 Thread Maxime Coquelin
Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/virtio_net.c | 123 --
 1 file changed, 119 insertions(+), 4 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 9171ee733..ef86e5b40 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -130,7 +130,7 @@ update_shadow_used_ring_split(struct vhost_virtqueue *vq,
vq->shadow_used_split[i].len = len;
 }
 
-static __rte_unused __rte_always_inline void
+static __rte_always_inline void
 flush_shadow_used_ring_packed(struct virtio_net *dev,
struct vhost_virtqueue *vq)
 {
@@ -184,7 +184,7 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
vhost_log_cache_sync(dev, vq);
 }
 
-static __rte_unused __rte_always_inline void
+static __rte_always_inline void
 update_shadow_used_ring_packed(struct vhost_virtqueue *vq,
 uint16_t desc_idx, uint16_t len, uint16_t count)
 {
@@ -506,7 +506,7 @@ fill_vec_buf_packed_indirect(struct virtio_net *dev,
return 0;
 }
 
-static __rte_unused __rte_always_inline int
+static __rte_always_inline int
 fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint16_t avail_idx, uint16_t *desc_count,
struct buf_vector *buf_vec, uint16_t *vec_idx,
@@ -561,6 +561,65 @@ fill_vec_buf_packed(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
return 0;
 }
 
+/*
+ * Returns -1 on fail, 0 on success
+ */
+static inline int
+reserve_avail_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+   uint32_t size, struct buf_vector *buf_vec,
+   uint16_t *nr_vec, uint16_t *num_buffers,
+   uint16_t *nr_descs)
+{
+   uint16_t avail_idx;
+   uint16_t vec_idx = 0;
+   uint16_t max_tries, tries = 0;
+
+   uint16_t buf_id = 0;
+   uint16_t len = 0;
+   uint16_t desc_count;
+
+   *num_buffers = 0;
+   avail_idx = vq->last_avail_idx;
+
+   if (rxvq_is_mergeable(dev))
+   max_tries = vq->size;
+   else
+   max_tries = 1;
+
+   while (size > 0) {
+   if (unlikely(fill_vec_buf_packed(dev, vq,
+   avail_idx, &desc_count,
+   buf_vec, &vec_idx,
+   &buf_id, &len,
+   VHOST_ACCESS_RO) < 0))
+   return -1;
+
+   len = RTE_MIN(len, size);
+   update_shadow_used_ring_packed(vq, buf_id, len, desc_count);
+   size -= len;
+
+   avail_idx += desc_count;
+   if (avail_idx >= vq->size)
+   avail_idx -= vq->size;
+
+   *nr_descs += desc_count;
+   tries++;
+   *num_buffers += 1;
+
+   /*
+* if we tried all available ring items, and still
+* can't get enough buf, it means something abnormal
+* happened.
+*/
+   if (unlikely(tries > max_tries))
+   return -1;
+   }
+
+   *nr_vec = vec_idx;
+
+   return 0;
+}
+
 static __rte_always_inline int
 copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
struct rte_mbuf *m, struct buf_vector *buf_vec,
@@ -773,6 +832,59 @@ virtio_dev_rx_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
return pkt_idx;
 }
 
+static __rte_always_inline uint32_t
+virtio_dev_rx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+   struct rte_mbuf **pkts, uint32_t count)
+{
+   uint32_t pkt_idx = 0;
+   uint16_t num_buffers;
+   struct buf_vector buf_vec[BUF_VECTOR_MAX];
+
+   for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
+   uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
+   uint16_t nr_vec = 0;
+   uint16_t nr_descs = 0;
+
+   if (unlikely(reserve_avail_buf_packed(dev, vq,
+   pkt_len, buf_vec, &nr_vec,
+   &num_buffers, &nr_descs) < 0)) {
+   VHOST_LOG_DEBUG(VHOST_DATA,
+   "(%d) failed to get enough desc from vring\n",
+   dev->vid);
+   vq->shadow_used_idx -= num_buffers;
+   break;
+   }
+
+   rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
+
+   VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index 
%d\n",
+   dev->vid, vq->last_avail_idx,
+   vq->last_avail_idx + num_buffers);
+
+   if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx],
+  

[dpdk-dev] [PATCH v9 15/15] vhost: advertize packed ring layout support

2018-07-06 Thread Maxime Coquelin
Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 760a09c0d..9b0ebb754 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -275,7 +275,8 @@ struct vring_packed_desc_event {
(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
(1ULL << VIRTIO_NET_F_MTU)  | \
(1ULL << VIRTIO_F_IN_ORDER) | \
-   (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+   (1ULL << VIRTIO_F_IOMMU_PLATFORM) | \
+   (1ULL << VIRTIO_F_RING_PACKED))
 
 
 struct guest_page {
-- 
2.14.4



[dpdk-dev] [PATCH v9 14/15] vhost: add notification for packed ring

2018-07-06 Thread Maxime Coquelin
Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost.c  | 71 +++--
 lib/librte_vhost/vhost.h  | 73 +--
 lib/librte_vhost/vhost_user.c | 24 ++
 lib/librte_vhost/virtio_net.c | 12 +++
 4 files changed, 162 insertions(+), 18 deletions(-)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 534f30cf6..3c9be10a0 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -163,13 +163,28 @@ vring_translate_packed(struct virtio_net *dev, struct 
vhost_virtqueue *vq)
 
req_size = sizeof(struct vring_packed_desc) * vq->size;
size = req_size;
-   vq->desc_packed =
-   (struct vring_packed_desc *)(uintptr_t)vhost_iova_to_vva(dev,
-   vq, vq->ring_addrs.desc_user_addr,
-   &size, VHOST_ACCESS_RW);
+   vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
+   vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
+   &size, VHOST_ACCESS_RW);
if (!vq->desc_packed || size != req_size)
return -1;
 
+   req_size = sizeof(struct vring_packed_desc_event);
+   size = req_size;
+   vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
+   vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
+   &size, VHOST_ACCESS_RW);
+   if (!vq->driver_event || size != req_size)
+   return -1;
+
+   req_size = sizeof(struct vring_packed_desc_event);
+   size = req_size;
+   vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
+   vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
+   &size, VHOST_ACCESS_RW);
+   if (!vq->device_event || size != req_size)
+   return -1;
+
return 0;
 }
 
@@ -270,6 +285,7 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t 
vring_idx)
rte_spinlock_init(&vq->access_lock);
vq->avail_wrap_counter = 1;
vq->used_wrap_counter = 1;
+   vq->signalled_used_valid = false;
 
dev->nr_vring += 1;
 
@@ -604,7 +620,11 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx)
if (!vq)
return -1;
 
-   vhost_vring_call(dev, vq);
+   if (vq_is_packed(dev))
+   vhost_vring_call_packed(dev, vq);
+   else
+   vhost_vring_call_split(dev, vq);
+
return 0;
 }
 
@@ -625,19 +645,52 @@ rte_vhost_avail_entries(int vid, uint16_t queue_id)
return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
 }
 
+static inline void
+vhost_enable_notify_split(struct vhost_virtqueue *vq, int enable)
+{
+   if (enable)
+   vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
+   else
+   vq->used->flags |= VRING_USED_F_NO_NOTIFY;
+}
+
+static inline void
+vhost_enable_notify_packed(struct virtio_net *dev,
+   struct vhost_virtqueue *vq, int enable)
+{
+   uint16_t flags;
+
+   if (!enable)
+   vq->device_event->flags = VRING_EVENT_F_DISABLE;
+
+   flags = VRING_EVENT_F_ENABLE;
+   if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
+   flags = VRING_EVENT_F_DESC;
+   vq->device_event->off_wrap = vq->last_avail_idx |
+   vq->avail_wrap_counter << 15;
+   }
+
+   rte_smp_wmb();
+
+   vq->device_event->flags = flags;
+}
+
 int
 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
 {
struct virtio_net *dev = get_device(vid);
+   struct vhost_virtqueue *vq;
 
if (!dev)
return -1;
 
-   if (enable)
-   dev->virtqueue[queue_id]->used->flags &=
-   ~VRING_USED_F_NO_NOTIFY;
+   vq = dev->virtqueue[queue_id];
+
+   if (vq_is_packed(dev))
+   vhost_enable_notify_packed(dev, vq, enable);
else
-   dev->virtqueue[queue_id]->used->flags |= VRING_USED_F_NO_NOTIFY;
+   vhost_enable_notify_split(vq, enable);
+
return 0;
 }
 
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index a7e602bec..760a09c0d 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -95,14 +95,21 @@ struct vhost_virtqueue {
struct vring_desc   *desc;
struct vring_packed_desc   *desc_packed;
};
-   struct vring_avail  *avail;
-   struct vring_used   *used;
+   union {
+   struct vring_avail  *avail;
+   struct vring_packed_desc_event *driver_event;
+   };
+   union {
+   struct vring_used   *used;
+   struct vring_packed_desc_event *device_event;
+   };
uint32_tsize;
 
uint16_tlast_avail_idx;
uint16_t

Re: [dpdk-dev] [PATCH v4 1/5] vhost: use shadow used ring in dequeue path

2018-07-06 Thread Maxime Coquelin

Hi Tiwei,

On 07/06/2018 09:04 AM, Maxime Coquelin wrote:

Relax used ring contention by reusing the shadow used
ring feature used by enqueue path.

Signed-off-by: Maxime Coquelin

Just noticed I forgot to apply your:
Reviewed-by: Tiwei Bie 

Regards,
Maxime

---
  lib/librte_vhost/virtio_net.c | 50 +--
  1 file changed, 10 insertions(+), 40 deletions(-)


[dpdk-dev] [PATCH v3 01/16] bus/dpaa: fix phandle support for kernel 4.16

2018-07-06 Thread Hemant Agrawal
From: Alok Makhariya 

Fixes: 2183c6f69d7e ("bus/dpaa: add OF parser for device scanning")
Cc: Shreyansh Jain 
Cc: sta...@dpdk.org

Signed-off-by: Alok Makhariya 
Acked-by: Shreyansh Jain 
---
 drivers/bus/dpaa/base/fman/of.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/bus/dpaa/base/fman/of.c b/drivers/bus/dpaa/base/fman/of.c
index 1b2dbe2..eb55cb9 100644
--- a/drivers/bus/dpaa/base/fman/of.c
+++ b/drivers/bus/dpaa/base/fman/of.c
@@ -182,6 +182,11 @@ linear_dir(struct dt_dir *d)
DPAA_BUS_LOG(DEBUG, "Duplicate lphandle in %s",
 d->node.node.full_name);
d->lphandle = f;
+   } else if (!strcmp(f->node.node.name, "phandle")) {
+   if (d->lphandle)
+   DPAA_BUS_LOG(DEBUG, "Duplicate lphandle in %s",
+d->node.node.full_name);
+   d->lphandle = f;
} else if (!strcmp(f->node.node.name, "#address-cells")) {
if (d->a_cells)
DPAA_BUS_LOG(DEBUG, "Duplicate a_cells in %s",
-- 
2.7.4



[dpdk-dev] [PATCH v3 02/16] bus/dpaa: fix svr id fetch location

2018-07-06 Thread Hemant Agrawal
Otherwise the SVR may not be avilable for dpaa init.

Fixes: 3b59b73dea08 ("bus/dpaa: update platform SoC value register routines")
Cc: sta...@dpdk.org

Signed-off-by: Hemant Agrawal 
Acked-by: Shreyansh Jain 
---
 drivers/bus/dpaa/dpaa_bus.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/bus/dpaa/dpaa_bus.c b/drivers/bus/dpaa/dpaa_bus.c
index 2046206..7956bd0 100644
--- a/drivers/bus/dpaa/dpaa_bus.c
+++ b/drivers/bus/dpaa/dpaa_bus.c
@@ -539,6 +539,13 @@ rte_dpaa_bus_probe(void)
unsigned int svr_ver;
int probe_all = rte_dpaa_bus.bus.conf.scan_mode != 
RTE_BUS_SCAN_WHITELIST;
 
+   svr_file = fopen(DPAA_SOC_ID_FILE, "r");
+   if (svr_file) {
+   if (fscanf(svr_file, "svr:%x", &svr_ver) > 0)
+   dpaa_svr_family = svr_ver & SVR_MASK;
+   fclose(svr_file);
+   }
+
/* For each registered driver, and device, call the driver->probe */
TAILQ_FOREACH(dev, &rte_dpaa_bus.device_list, next) {
TAILQ_FOREACH(drv, &rte_dpaa_bus.driver_list, next) {
@@ -569,13 +576,6 @@ rte_dpaa_bus_probe(void)
if (!TAILQ_EMPTY(&rte_dpaa_bus.device_list))
rte_mbuf_set_platform_mempool_ops(DPAA_MEMPOOL_OPS_NAME);
 
-   svr_file = fopen(DPAA_SOC_ID_FILE, "r");
-   if (svr_file) {
-   if (fscanf(svr_file, "svr:%x", &svr_ver) > 0)
-   dpaa_svr_family = svr_ver & SVR_MASK;
-   fclose(svr_file);
-   }
-
return 0;
 }
 
-- 
2.7.4



[dpdk-dev] [PATCH v3 04/16] net/dpaa: fix the queue err handling and logs

2018-07-06 Thread Hemant Agrawal
Fixes: 5e7455931442 ("net/dpaa: support Rx queue configurations with eventdev")
Cc: sta...@dpdk.org

Signed-off-by: Hemant Agrawal 
Acked-by: Shreyansh Jain 
---
 drivers/net/dpaa/dpaa_ethdev.c | 34 ++
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/drivers/net/dpaa/dpaa_ethdev.c b/drivers/net/dpaa/dpaa_ethdev.c
index d014a11..79ba6bd 100644
--- a/drivers/net/dpaa/dpaa_ethdev.c
+++ b/drivers/net/dpaa/dpaa_ethdev.c
@@ -516,7 +516,15 @@ int dpaa_eth_rx_queue_setup(struct rte_eth_dev *dev, 
uint16_t queue_idx,
 
PMD_INIT_FUNC_TRACE();
 
-   DPAA_PMD_INFO("Rx queue setup for queue index: %d", queue_idx);
+   if (queue_idx >= dev->data->nb_rx_queues) {
+   rte_errno = EOVERFLOW;
+   DPAA_PMD_ERR("%p: queue index out of range (%u >= %u)",
+ (void *)dev, queue_idx, dev->data->nb_rx_queues);
+   return -rte_errno;
+   }
+
+   DPAA_PMD_INFO("Rx queue setup for queue index: %d fq_id (0x%x)",
+   queue_idx, rxq->fqid);
 
if (!dpaa_intf->bp_info || dpaa_intf->bp_info->mp != mp) {
struct fman_if_ic_params icp;
@@ -580,9 +588,11 @@ int dpaa_eth_rx_queue_setup(struct rte_eth_dev *dev, 
uint16_t queue_idx,
opts.fqd.fq_ctrl |= QM_FQCTRL_CGE;
}
ret = qman_init_fq(rxq, flags, &opts);
-   if (ret)
-   DPAA_PMD_ERR("Channel/Queue association failed. fqid %d"
-" ret: %d", rxq->fqid, ret);
+   if (ret) {
+   DPAA_PMD_ERR("Channel/Q association failed. fqid 0x%x "
+   "ret:%d(%s)", rxq->fqid, ret, strerror(ret));
+   return ret;
+   }
rxq->cb.dqrr_dpdk_pull_cb = dpaa_rx_cb;
rxq->cb.dqrr_prepare = dpaa_rx_cb_prepare;
rxq->is_static = true;
@@ -657,8 +667,8 @@ dpaa_eth_eventq_attach(const struct rte_eth_dev *dev,
 
ret = qman_init_fq(rxq, flags, &opts);
if (ret) {
-   DPAA_PMD_ERR("Channel/Queue association failed. fqid %d ret:%d",
-rxq->fqid, ret);
+   DPAA_PMD_ERR("Ev-Channel/Q association failed. fqid 0x%x "
+   "ret:%d(%s)", rxq->fqid, ret, strerror(ret));
return ret;
}
 
@@ -715,7 +725,15 @@ int dpaa_eth_tx_queue_setup(struct rte_eth_dev *dev, 
uint16_t queue_idx,
 
PMD_INIT_FUNC_TRACE();
 
-   DPAA_PMD_INFO("Tx queue setup for queue index: %d", queue_idx);
+   if (queue_idx >= dev->data->nb_tx_queues) {
+   rte_errno = EOVERFLOW;
+   DPAA_PMD_ERR("%p: queue index out of range (%u >= %u)",
+ (void *)dev, queue_idx, dev->data->nb_tx_queues);
+   return -rte_errno;
+   }
+
+   DPAA_PMD_INFO("Tx queue setup for queue index: %d fq_id (0x%x)",
+   queue_idx, dpaa_intf->tx_queues[queue_idx].fqid);
dev->data->tx_queues[queue_idx] = &dpaa_intf->tx_queues[queue_idx];
return 0;
 }
@@ -1016,7 +1034,7 @@ static int dpaa_rx_queue_init(struct qman_fq *fq, struct 
qman_cgr *cgr_rx,
DPAA_PMD_DEBUG("creating rx fq %p, fqid %d", fq, fqid);
ret = qman_create_fq(fqid, QMAN_FQ_FLAG_NO_ENQUEUE, fq);
if (ret) {
-   DPAA_PMD_ERR("create rx fqid %d failed with ret: %d",
+   DPAA_PMD_ERR("create rx fqid 0x%x failed with ret: %d",
fqid, ret);
return ret;
}
-- 
2.7.4



[dpdk-dev] [PATCH v3 03/16] bus/dpaa: fix the buffer offset setting in FMAN

2018-07-06 Thread Hemant Agrawal
The buffer offset was incorrectly being set at 64,
thus not honoring the packet headroom.

Fixes: 6d6b4f49a155 (bus/dpaa: add FMAN hardware operations")
Cc: sta...@dpdk.org

Signed-off-by: Hemant Agrawal 
---
 drivers/bus/dpaa/base/fman/fman_hw.c | 20 +++-
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/bus/dpaa/base/fman/fman_hw.c 
b/drivers/bus/dpaa/base/fman/fman_hw.c
index 0148b98..7ada7fa 100644
--- a/drivers/bus/dpaa/base/fman/fman_hw.c
+++ b/drivers/bus/dpaa/base/fman/fman_hw.c
@@ -16,6 +16,8 @@
 #include 
 #include 
 
+#define FMAN_SP_EXT_BUF_MARG_START_SHIFT16
+
 /* Instantiate the global variable that the inline CRC64 implementation (in
  * ) depends on.
  */
@@ -422,20 +424,16 @@ fman_if_set_fc_quanta(struct fman_if *fm_if, u16 
pause_quanta)
 int
 fman_if_get_fdoff(struct fman_if *fm_if)
 {
-   u32 fmbm_ricp;
+   u32 fmbm_rebm;
int fdoff;
-   int iceof_mask = 0x001f;
-   int icsz_mask = 0x001f;
 
struct __fman_if *__if = container_of(fm_if, struct __fman_if, __if);
 
assert(fman_ccsr_map_fd != -1);
 
-   fmbm_ricp =
-  in_be32(&((struct rx_bmi_regs *)__if->bmi_map)->fmbm_ricp);
-   /*iceof + icsz*/
-   fdoff = ((fmbm_ricp & iceof_mask) >> 16) * 16 +
-   (fmbm_ricp & icsz_mask) * 16;
+   fmbm_rebm = in_be32(&((struct rx_bmi_regs *)__if->bmi_map)->fmbm_rebm);
+
+   fdoff = (fmbm_rebm >> FMAN_SP_EXT_BUF_MARG_START_SHIFT) & 0x1ff;
 
return fdoff;
 }
@@ -502,12 +500,16 @@ fman_if_set_fdoff(struct fman_if *fm_if, uint32_t 
fd_offset)
 {
struct __fman_if *__if = container_of(fm_if, struct __fman_if, __if);
unsigned int *fmbm_rebm;
+   int val = 0;
+   int fmbm_mask = 0x01ff;
+
+   val = fd_offset << FMAN_SP_EXT_BUF_MARG_START_SHIFT;
 
assert(fman_ccsr_map_fd != -1);
 
fmbm_rebm = &((struct rx_bmi_regs *)__if->bmi_map)->fmbm_rebm;
 
-   out_be32(fmbm_rebm, in_be32(fmbm_rebm) | (fd_offset << 16));
+   out_be32(fmbm_rebm, (in_be32(fmbm_rebm) & ~fmbm_mask) | val);
 }
 
 void
-- 
2.7.4



[dpdk-dev] [PATCH v3 06/16] bus/dpaa: optimize the fq callback routine

2018-07-06 Thread Hemant Agrawal
Avoid array of fq as packets are dq only from a single q.

Signed-off-by: Sunil Kumar Kori 
Signed-off-by: Hemant Agrawal 
Acked-by: Shreyansh Jain 
---
 drivers/bus/dpaa/base/qbman/qman.c | 15 +++
 drivers/net/dpaa/dpaa_rxtx.c   |  2 +-
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/bus/dpaa/base/qbman/qman.c 
b/drivers/bus/dpaa/base/qbman/qman.c
index 27d98cc..13c4315 100644
--- a/drivers/bus/dpaa/base/qbman/qman.c
+++ b/drivers/bus/dpaa/base/qbman/qman.c
@@ -1058,7 +1058,7 @@ unsigned int qman_portal_poll_rx(unsigned int poll_limit,
struct qm_portal *portal = &p->p;
register struct qm_dqrr *dqrr = &portal->dqrr;
struct qm_dqrr_entry *dq[QM_DQRR_SIZE], *shadow[QM_DQRR_SIZE];
-   struct qman_fq *fq[QM_DQRR_SIZE];
+   struct qman_fq *fq;
unsigned int limit = 0, rx_number = 0;
uint32_t consume = 0;
 
@@ -1092,14 +1092,13 @@ unsigned int qman_portal_poll_rx(unsigned int 
poll_limit,
 
/* SDQCR: context_b points to the FQ */
 #ifdef CONFIG_FSL_QMAN_FQ_LOOKUP
-   fq[rx_number] = qman_fq_lookup_table[be32_to_cpu(
-   dq[rx_number]->contextB)];
+   fq = qman_fq_lookup_table[be32_to_cpu(dq[rx_number]->contextB)];
 #else
-   fq[rx_number] = (void *)be32_to_cpu(
-   dq[rx_number]->contextB);
+   fq = (void *)be32_to_cpu(dq[rx_number]->contextB);
 #endif
-   fq[rx_number]->cb.dqrr_prepare(shadow[rx_number],
-&bufs[rx_number]);
+   if (fq->cb.dqrr_prepare)
+   fq->cb.dqrr_prepare(shadow[rx_number],
+   &bufs[rx_number]);
 
consume |= (1 << (31 - DQRR_PTR2IDX(shadow[rx_number])));
rx_number++;
@@ -1107,7 +1106,7 @@ unsigned int qman_portal_poll_rx(unsigned int poll_limit,
} while (++limit < poll_limit);
 
if (rx_number)
-   fq[0]->cb.dqrr_dpdk_pull_cb(fq, shadow, bufs, rx_number);
+   fq->cb.dqrr_dpdk_pull_cb(&fq, shadow, bufs, rx_number);
 
/* Consume all the DQRR enries together */
qm_out(DQRR_DCAP, (1 << 8) | consume);
diff --git a/drivers/net/dpaa/dpaa_rxtx.c b/drivers/net/dpaa/dpaa_rxtx.c
index 1316d2a..805bc30 100644
--- a/drivers/net/dpaa/dpaa_rxtx.c
+++ b/drivers/net/dpaa/dpaa_rxtx.c
@@ -431,7 +431,7 @@ dpaa_rx_cb(struct qman_fq **fq, struct qm_dqrr_entry **dqrr,
}
 
fd = &dqrr[i]->fd;
-   dpaa_intf = fq[i]->dpaa_intf;
+   dpaa_intf = fq[0]->dpaa_intf;
 
format = (fd->opaque & DPAA_FD_FORMAT_MASK) >>
DPAA_FD_FORMAT_SHIFT;
-- 
2.7.4



[dpdk-dev] [PATCH v3 08/16] bus/dpaa: make vdqcr configurable

2018-07-06 Thread Hemant Agrawal
From: Nipun Gupta 

This patch add support for configurable vdqcr exact flag.
This boost the performance, however this can give
side effects for some extra packet fetch. Which has been
taken care in the patch as well.

Signed-off-by: Nipun Gupta 
Acked-by: Shreyansh Jain 
---
 drivers/bus/dpaa/base/qbman/qman.c  |  4 ++--
 drivers/bus/dpaa/include/fsl_qman.h |  3 ++-
 drivers/crypto/dpaa_sec/dpaa_sec.c  | 19 ---
 drivers/net/dpaa/dpaa_rxtx.c| 18 +++---
 4 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/drivers/bus/dpaa/base/qbman/qman.c 
b/drivers/bus/dpaa/base/qbman/qman.c
index 13c4315..f5fe5ef 100644
--- a/drivers/bus/dpaa/base/qbman/qman.c
+++ b/drivers/bus/dpaa/base/qbman/qman.c
@@ -2002,13 +2002,13 @@ int qman_query_congestion(struct qm_mcr_querycongestion 
*congestion)
return 0;
 }
 
-int qman_set_vdq(struct qman_fq *fq, u16 num)
+int qman_set_vdq(struct qman_fq *fq, u16 num, uint32_t vdqcr_flags)
 {
struct qman_portal *p = get_affine_portal();
uint32_t vdqcr;
int ret = -EBUSY;
 
-   vdqcr = QM_VDQCR_EXACT;
+   vdqcr = vdqcr_flags;
vdqcr |= QM_VDQCR_NUMFRAMES_SET(num);
 
if ((fq->state != qman_fq_state_parked) &&
diff --git a/drivers/bus/dpaa/include/fsl_qman.h 
b/drivers/bus/dpaa/include/fsl_qman.h
index e4ad7ae..b18cf03 100644
--- a/drivers/bus/dpaa/include/fsl_qman.h
+++ b/drivers/bus/dpaa/include/fsl_qman.h
@@ -1332,10 +1332,11 @@ unsigned int qman_portal_poll_rx(unsigned int 
poll_limit,
  * qman_set_vdq - Issue a volatile dequeue command
  * @fq: Frame Queue on which the volatile dequeue command is issued
  * @num: Number of Frames requested for volatile dequeue
+ * @vdqcr_flags: QM_VDQCR_EXACT flag to for VDQCR command
  *
  * This function will issue a volatile dequeue command to the QMAN.
  */
-int qman_set_vdq(struct qman_fq *fq, u16 num);
+int qman_set_vdq(struct qman_fq *fq, u16 num, uint32_t vdqcr_flags);
 
 /**
  * qman_dequeue - Get the DQRR entry after volatile dequeue command
diff --git a/drivers/crypto/dpaa_sec/dpaa_sec.c 
b/drivers/crypto/dpaa_sec/dpaa_sec.c
index 06f7e43..a07869f 100644
--- a/drivers/crypto/dpaa_sec/dpaa_sec.c
+++ b/drivers/crypto/dpaa_sec/dpaa_sec.c
@@ -526,12 +526,25 @@ dpaa_sec_deq(struct dpaa_sec_qp *qp, struct rte_crypto_op 
**ops, int nb_ops)
 {
struct qman_fq *fq;
unsigned int pkts = 0;
-   int ret;
+   int num_rx_bufs, ret;
struct qm_dqrr_entry *dq;
+   uint32_t vdqcr_flags = 0;
 
fq = &qp->outq;
-   ret = qman_set_vdq(fq, (nb_ops > DPAA_MAX_DEQUEUE_NUM_FRAMES) ?
-   DPAA_MAX_DEQUEUE_NUM_FRAMES : nb_ops);
+   /*
+* Until request for four buffers, we provide exact number of buffers.
+* Otherwise we do not set the QM_VDQCR_EXACT flag.
+* Not setting QM_VDQCR_EXACT flag can provide two more buffers than
+* requested, so we request two less in this case.
+*/
+   if (nb_ops < 4) {
+   vdqcr_flags = QM_VDQCR_EXACT;
+   num_rx_bufs = nb_ops;
+   } else {
+   num_rx_bufs = nb_ops > DPAA_MAX_DEQUEUE_NUM_FRAMES ?
+   (DPAA_MAX_DEQUEUE_NUM_FRAMES - 2) : (nb_ops - 2);
+   }
+   ret = qman_set_vdq(fq, num_rx_bufs, vdqcr_flags);
if (ret)
return 0;
 
diff --git a/drivers/net/dpaa/dpaa_rxtx.c b/drivers/net/dpaa/dpaa_rxtx.c
index 805bc30..168b77e 100644
--- a/drivers/net/dpaa/dpaa_rxtx.c
+++ b/drivers/net/dpaa/dpaa_rxtx.c
@@ -560,7 +560,8 @@ uint16_t dpaa_eth_queue_rx(void *q,
struct qman_fq *fq = q;
struct qm_dqrr_entry *dq;
uint32_t num_rx = 0, ifid = ((struct dpaa_if *)fq->dpaa_intf)->ifid;
-   int ret;
+   int num_rx_bufs, ret;
+   uint32_t vdqcr_flags = 0;
 
if (likely(fq->is_static))
return dpaa_eth_queue_portal_rx(fq, bufs, nb_bufs);
@@ -573,8 +574,19 @@ uint16_t dpaa_eth_queue_rx(void *q,
}
}
 
-   ret = qman_set_vdq(fq, (nb_bufs > DPAA_MAX_DEQUEUE_NUM_FRAMES) ?
-   DPAA_MAX_DEQUEUE_NUM_FRAMES : nb_bufs);
+   /* Until request for four buffers, we provide exact number of buffers.
+* Otherwise we do not set the QM_VDQCR_EXACT flag.
+* Not setting QM_VDQCR_EXACT flag can provide two more buffers than
+* requested, so we request two less in this case.
+*/
+   if (nb_bufs < 4) {
+   vdqcr_flags = QM_VDQCR_EXACT;
+   num_rx_bufs = nb_bufs;
+   } else {
+   num_rx_bufs = nb_bufs > DPAA_MAX_DEQUEUE_NUM_FRAMES ?
+   (DPAA_MAX_DEQUEUE_NUM_FRAMES - 2) : (nb_bufs - 2);
+   }
+   ret = qman_set_vdq(fq, num_rx_bufs, vdqcr_flags);
if (ret)
return 0;
 
-- 
2.7.4



[dpdk-dev] [PATCH v3 09/16] net/dpaa: support default queue mode

2018-07-06 Thread Hemant Agrawal
In case DPAA FMAN configuration tool (FMC) is not available,
the system can still work with default queue(1 queue per port).

This patch also fixes some logs related to FQ ids, which were
idetified while testing this support.

Signed-off-by: Hemant Agrawal 
---
 drivers/net/dpaa/dpaa_ethdev.c | 37 ++---
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/drivers/net/dpaa/dpaa_ethdev.c b/drivers/net/dpaa/dpaa_ethdev.c
index 79ba6bd..def9483 100644
--- a/drivers/net/dpaa/dpaa_ethdev.c
+++ b/drivers/net/dpaa/dpaa_ethdev.c
@@ -74,6 +74,7 @@ static uint64_t dev_tx_offloads_nodis =
 
 /* Keep track of whether QMAN and BMAN have been globally initialized */
 static int is_global_init;
+static int default_q;  /* use default queue - FMC is not executed*/
 /* At present we only allow up to 4 push mode queues as default - as each of
  * this queue need dedicated portal and we are short of portals.
  */
@@ -1026,12 +1027,12 @@ static int dpaa_rx_queue_init(struct qman_fq *fq, 
struct qman_cgr *cgr_rx,
 
ret = qman_reserve_fqid(fqid);
if (ret) {
-   DPAA_PMD_ERR("reserve rx fqid %d failed with ret: %d",
+   DPAA_PMD_ERR("reserve rx fqid 0x%x failed with ret: %d",
 fqid, ret);
return -EINVAL;
}
 
-   DPAA_PMD_DEBUG("creating rx fq %p, fqid %d", fq, fqid);
+   DPAA_PMD_DEBUG("creating rx fq %p, fqid 0x%x", fq, fqid);
ret = qman_create_fq(fqid, QMAN_FQ_FLAG_NO_ENQUEUE, fq);
if (ret) {
DPAA_PMD_ERR("create rx fqid 0x%x failed with ret: %d",
@@ -1050,7 +1051,7 @@ static int dpaa_rx_queue_init(struct qman_fq *fq, struct 
qman_cgr *cgr_rx,
  &cgr_opts);
if (ret) {
DPAA_PMD_WARN(
-   "rx taildrop init fail on rx fqid %d (ret=%d)",
+   "rx taildrop init fail on rx fqid 0x%x(ret=%d)",
fqid, ret);
goto without_cgr;
}
@@ -1061,7 +1062,7 @@ static int dpaa_rx_queue_init(struct qman_fq *fq, struct 
qman_cgr *cgr_rx,
 without_cgr:
ret = qman_init_fq(fq, flags, &opts);
if (ret)
-   DPAA_PMD_ERR("init rx fqid %d failed with ret: %d", fqid, ret);
+   DPAA_PMD_ERR("init rx fqid 0x%x failed with ret:%d", fqid, ret);
return ret;
 }
 
@@ -1089,10 +1090,10 @@ static int dpaa_tx_queue_init(struct qman_fq *fq,
/* no tx-confirmation */
opts.fqd.context_a.hi = 0x8000 | fman_dealloc_bufs_mask_hi;
opts.fqd.context_a.lo = 0 | fman_dealloc_bufs_mask_lo;
-   DPAA_PMD_DEBUG("init tx fq %p, fqid %d", fq, fq->fqid);
+   DPAA_PMD_DEBUG("init tx fq %p, fqid 0x%x", fq, fq->fqid);
ret = qman_init_fq(fq, QMAN_INITFQ_FLAG_SCHED, &opts);
if (ret)
-   DPAA_PMD_ERR("init tx fqid %d failed %d", fq->fqid, ret);
+   DPAA_PMD_ERR("init tx fqid 0x%x failed %d", fq->fqid, ret);
return ret;
 }
 
@@ -1163,10 +1164,15 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev)
dpaa_intf->cfg = cfg;
 
/* Initialize Rx FQ's */
-   if (getenv("DPAA_NUM_RX_QUEUES"))
-   num_rx_fqs = atoi(getenv("DPAA_NUM_RX_QUEUES"));
-   else
+   if (default_q) {
num_rx_fqs = DPAA_DEFAULT_NUM_PCD_QUEUES;
+   } else {
+   if (getenv("DPAA_NUM_RX_QUEUES"))
+   num_rx_fqs = atoi(getenv("DPAA_NUM_RX_QUEUES"));
+   else
+   num_rx_fqs = DPAA_DEFAULT_NUM_PCD_QUEUES;
+   }
+
 
/* if push mode queues to be enabled. Currenly we are allowing only
 * one queue per thread.
@@ -1214,8 +1220,11 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev)
}
 
for (loop = 0; loop < num_rx_fqs; loop++) {
-   fqid = DPAA_PCD_FQID_START + dpaa_intf->ifid *
-   DPAA_PCD_FQID_MULTIPLIER + loop;
+   if (default_q)
+   fqid = cfg->rx_def;
+   else
+   fqid = DPAA_PCD_FQID_START + dpaa_intf->ifid *
+   DPAA_PCD_FQID_MULTIPLIER + loop;
 
if (dpaa_intf->cgr_rx)
dpaa_intf->cgr_rx[loop].cgrid = cgrid[loop];
@@ -1409,6 +1418,12 @@ rte_dpaa_probe(struct rte_dpaa_driver *dpaa_drv,
return ret;
}
 
+   if (access("/tmp/fmc.bin", F_OK) == -1) {
+   RTE_LOG(INFO, PMD,
+   "* FMC not configured.Enabling default mode\n");
+   default_q = 1;
+   }
+
is_global_init = 1;
}
 
-- 
2.7.4



[dpdk-dev] [PATCH v3 05/16] net/dpaa2: fix the prefetch Rx to honor nb pkts

2018-07-06 Thread Hemant Agrawal
This patch fixes prefetch rx routine to
set the next prefetch request to the size of nb_pkts.
It assumes that next request would ideally be
of same size.

Fixes: 4bc5ab88dbd6 ("net/dpaa2: fix Tx only mode")
Cc: sta...@dpdk.org

Signed-off-by: Hemant Agrawal 
Acked-by: Shreyansh Jain 
---
 drivers/net/dpaa2/dpaa2_rxtx.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/net/dpaa2/dpaa2_rxtx.c b/drivers/net/dpaa2/dpaa2_rxtx.c
index dac086d..ef109a6 100644
--- a/drivers/net/dpaa2/dpaa2_rxtx.c
+++ b/drivers/net/dpaa2/dpaa2_rxtx.c
@@ -447,6 +447,12 @@ eth_copy_mbuf_to_fd(struct rte_mbuf *mbuf,
 return 0;
 }
 
+/* This function assumes that caller will be keep the same value for nb_pkts
+ * across calls per queue, if that is not the case, better use non-prefetch
+ * version of rx call.
+ * It will return the packets as requested in previous call without honoring
+ * the current nb_pkts or bufs space.
+ */
 uint16_t
 dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
@@ -454,7 +460,7 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, 
uint16_t nb_pkts)
struct dpaa2_queue *dpaa2_q = (struct dpaa2_queue *)queue;
struct qbman_result *dq_storage, *dq_storage1 = NULL;
uint32_t fqid = dpaa2_q->fqid;
-   int ret, num_rx = 0;
+   int ret, num_rx = 0, pull_size;
uint8_t pending, status;
struct qbman_swp *swp;
const struct qbman_fd *fd, *next_fd;
@@ -470,12 +476,12 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf 
**bufs, uint16_t nb_pkts)
}
}
swp = DPAA2_PER_LCORE_ETHRX_PORTAL;
-
+   pull_size = (nb_pkts > DPAA2_DQRR_RING_SIZE) ?
+  DPAA2_DQRR_RING_SIZE : nb_pkts;
if (unlikely(!q_storage->active_dqs)) {
q_storage->toggle = 0;
dq_storage = q_storage->dq_storage[q_storage->toggle];
-   q_storage->last_num_pkts = (nb_pkts > DPAA2_DQRR_RING_SIZE) ?
-  DPAA2_DQRR_RING_SIZE : nb_pkts;
+   q_storage->last_num_pkts = pull_size;
qbman_pull_desc_clear(&pulldesc);
qbman_pull_desc_set_numframes(&pulldesc,
  q_storage->last_num_pkts);
@@ -514,7 +520,7 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, 
uint16_t nb_pkts)
q_storage->toggle ^= 1;
dq_storage1 = q_storage->dq_storage[q_storage->toggle];
qbman_pull_desc_clear(&pulldesc);
-   qbman_pull_desc_set_numframes(&pulldesc, DPAA2_DQRR_RING_SIZE);
+   qbman_pull_desc_set_numframes(&pulldesc, pull_size);
qbman_pull_desc_set_fq(&pulldesc, fqid);
qbman_pull_desc_set_storage(&pulldesc, dq_storage1,
(uint64_t)(DPAA2_VADDR_TO_IOVA(dq_storage1)), 1);
-- 
2.7.4



[dpdk-dev] [PATCH v3 07/16] bus/dpaa: implement new of API to get MAC address

2018-07-06 Thread Hemant Agrawal
From: Akhil Goyal 

Signed-off-by: Akhil Goyal 
Acked-by: Shreyansh Jain 
---
 drivers/bus/dpaa/base/fman/of.c   | 39 +++
 drivers/bus/dpaa/include/of.h |  2 ++
 drivers/bus/dpaa/rte_bus_dpaa_version.map |  8 +++
 3 files changed, 49 insertions(+)

diff --git a/drivers/bus/dpaa/base/fman/of.c b/drivers/bus/dpaa/base/fman/of.c
index eb55cb9..a7f3174 100644
--- a/drivers/bus/dpaa/base/fman/of.c
+++ b/drivers/bus/dpaa/base/fman/of.c
@@ -546,3 +546,42 @@ of_device_is_compatible(const struct device_node *dev_node,
return true;
return false;
 }
+
+static const void *of_get_mac_addr(const struct device_node *np,
+   const char *name)
+{
+   return of_get_property(np, name, NULL);
+}
+
+/**
+ * Search the device tree for the best MAC address to use.  'mac-address' is
+ * checked first, because that is supposed to contain to "most recent" MAC
+ * address. If that isn't set, then 'local-mac-address' is checked next,
+ * because that is the default address.  If that isn't set, then the obsolete
+ * 'address' is checked, just in case we're using an old device tree.
+ *
+ * Note that the 'address' property is supposed to contain a virtual address of
+ * the register set, but some DTS files have redefined that property to be the
+ * MAC address.
+ *
+ * All-zero MAC addresses are rejected, because those could be properties that
+ * exist in the device tree, but were not set by U-Boot.  For example, the
+ * DTS could define 'mac-address' and 'local-mac-address', with zero MAC
+ * addresses.  Some older U-Boots only initialized 'local-mac-address'.  In
+ * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists
+ * but is all zeros.
+ */
+const void *of_get_mac_address(const struct device_node *np)
+{
+   const void *addr;
+
+   addr = of_get_mac_addr(np, "mac-address");
+   if (addr)
+   return addr;
+
+   addr = of_get_mac_addr(np, "local-mac-address");
+   if (addr)
+   return addr;
+
+   return of_get_mac_addr(np, "address");
+}
diff --git a/drivers/bus/dpaa/include/of.h b/drivers/bus/dpaa/include/of.h
index 151be5a..7ea7608 100644
--- a/drivers/bus/dpaa/include/of.h
+++ b/drivers/bus/dpaa/include/of.h
@@ -109,6 +109,8 @@ const struct device_node *of_get_parent(const struct 
device_node *dev_node);
 const struct device_node *of_get_next_child(const struct device_node *dev_node,
const struct device_node *prev);
 
+const void *of_get_mac_address(const struct device_node *np);
+
 #define for_each_child_node(parent, child) \
for (child = of_get_next_child(parent, NULL); child != NULL; \
child = of_get_next_child(parent, child))
diff --git a/drivers/bus/dpaa/rte_bus_dpaa_version.map 
b/drivers/bus/dpaa/rte_bus_dpaa_version.map
index 8d90285..e00c911 100644
--- a/drivers/bus/dpaa/rte_bus_dpaa_version.map
+++ b/drivers/bus/dpaa/rte_bus_dpaa_version.map
@@ -92,3 +92,11 @@ DPDK_18.02 {
 
local: *;
 } DPDK_17.11;
+
+DPDK_18.08 {
+   global:
+
+   of_get_mac_address;
+
+   local: *;
+} DPDK_18.02;
-- 
2.7.4



[dpdk-dev] [PATCH v3 11/16] bus/dpaa: cleanup unnecessary global variables

2018-07-06 Thread Hemant Agrawal
Signed-off-by: Pavan Nikhilesh 
Signed-off-by: Hemant Agrawal 
---
 drivers/bus/dpaa/base/fman/netcfg_layer.c | 5 -
 drivers/bus/dpaa/base/qbman/bman_driver.c | 4 ++--
 drivers/bus/dpaa/base/qbman/qman.c| 2 +-
 drivers/bus/dpaa/base/qbman/qman_driver.c | 4 ++--
 drivers/bus/dpaa/base/qbman/qman_priv.h   | 1 -
 drivers/bus/dpaa/dpaa_bus.c   | 2 +-
 6 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/bus/dpaa/base/fman/netcfg_layer.c 
b/drivers/bus/dpaa/base/fman/netcfg_layer.c
index 3e956ce..031c6f1 100644
--- a/drivers/bus/dpaa/base/fman/netcfg_layer.c
+++ b/drivers/bus/dpaa/base/fman/netcfg_layer.c
@@ -18,11 +18,6 @@
 #include 
 #include 
 
-/* Structure contains information about all the interfaces given by user
- * on command line.
- */
-struct netcfg_interface *netcfg_interface;
-
 /* This data structure contaings all configurations information
  * related to usages of DPA devices.
  */
diff --git a/drivers/bus/dpaa/base/qbman/bman_driver.c 
b/drivers/bus/dpaa/base/qbman/bman_driver.c
index 1381da3..b14b590 100644
--- a/drivers/bus/dpaa/base/qbman/bman_driver.c
+++ b/drivers/bus/dpaa/base/qbman/bman_driver.c
@@ -15,9 +15,9 @@
 /*
  * Global variables of the max portal/pool number this bman version supported
  */
-u16 bman_ip_rev;
+static u16 bman_ip_rev;
 u16 bman_pool_max;
-void *bman_ccsr_map;
+static void *bman_ccsr_map;
 
 /*/
 /* Portal driver */
diff --git a/drivers/bus/dpaa/base/qbman/qman.c 
b/drivers/bus/dpaa/base/qbman/qman.c
index f5fe5ef..7c17027 100644
--- a/drivers/bus/dpaa/base/qbman/qman.c
+++ b/drivers/bus/dpaa/base/qbman/qman.c
@@ -625,7 +625,7 @@ struct qman_portal *qman_create_portal(
 
 #define MAX_GLOBAL_PORTALS 8
 static struct qman_portal global_portals[MAX_GLOBAL_PORTALS];
-rte_atomic16_t global_portals_used[MAX_GLOBAL_PORTALS];
+static rte_atomic16_t global_portals_used[MAX_GLOBAL_PORTALS];
 
 static struct qman_portal *
 qman_alloc_global_portal(void)
diff --git a/drivers/bus/dpaa/base/qbman/qman_driver.c 
b/drivers/bus/dpaa/base/qbman/qman_driver.c
index 07b29d5..f6ecd6b 100644
--- a/drivers/bus/dpaa/base/qbman/qman_driver.c
+++ b/drivers/bus/dpaa/base/qbman/qman_driver.c
@@ -20,9 +20,9 @@ u16 qm_channel_caam = QMAN_CHANNEL_CAAM;
 u16 qm_channel_pme = QMAN_CHANNEL_PME;
 
 /* Ccsr map address to access ccsrbased register */
-void *qman_ccsr_map;
+static void *qman_ccsr_map;
 /* The qman clock frequency */
-u32 qman_clk;
+static u32 qman_clk;
 
 static __thread int qmfd = -1;
 static __thread struct qm_portal_config qpcfg;
diff --git a/drivers/bus/dpaa/base/qbman/qman_priv.h 
b/drivers/bus/dpaa/base/qbman/qman_priv.h
index 9e4471e..02f6301 100644
--- a/drivers/bus/dpaa/base/qbman/qman_priv.h
+++ b/drivers/bus/dpaa/base/qbman/qman_priv.h
@@ -139,7 +139,6 @@ struct qm_portal_config {
 #define QMAN_REV31 0x0301
 #define QMAN_REV32 0x0302
 extern u16 qman_ip_rev; /* 0 if uninitialised, otherwise QMAN_REVx */
-extern u32 qman_clk;
 
 int qm_set_wpm(int wpm);
 int qm_get_wpm(int *wpm);
diff --git a/drivers/bus/dpaa/dpaa_bus.c b/drivers/bus/dpaa/dpaa_bus.c
index 7956bd0..5ba3d28 100644
--- a/drivers/bus/dpaa/dpaa_bus.c
+++ b/drivers/bus/dpaa/dpaa_bus.c
@@ -50,7 +50,7 @@ struct rte_dpaa_bus rte_dpaa_bus;
 struct netcfg_info *dpaa_netcfg;
 
 /* define a variable to hold the portal_key, once created.*/
-pthread_key_t dpaa_portal_key;
+static pthread_key_t dpaa_portal_key;
 
 unsigned int dpaa_svr_family;
 
-- 
2.7.4



[dpdk-dev] [PATCH v3 10/16] net/dpaa: remove experimental tag from PMD APIs

2018-07-06 Thread Hemant Agrawal
Signed-off-by: Hemant Agrawal 
Acked-by: Shreyansh Jain 
---
 drivers/net/dpaa/dpaa_ethdev.c| 6 +++---
 drivers/net/dpaa/dpaa_ethdev.h| 8 +---
 drivers/net/dpaa/rte_pmd_dpaa.h   | 5 +
 drivers/net/dpaa/rte_pmd_dpaa_version.map | 4 ++--
 4 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/net/dpaa/dpaa_ethdev.c b/drivers/net/dpaa/dpaa_ethdev.c
index def9483..00611f8 100644
--- a/drivers/net/dpaa/dpaa_ethdev.c
+++ b/drivers/net/dpaa/dpaa_ethdev.c
@@ -617,7 +617,7 @@ int dpaa_eth_rx_queue_setup(struct rte_eth_dev *dev, 
uint16_t queue_idx,
return 0;
 }
 
-int __rte_experimental
+int
 dpaa_eth_eventq_attach(const struct rte_eth_dev *dev,
int eth_rx_queue_id,
u16 ch_id,
@@ -680,7 +680,7 @@ dpaa_eth_eventq_attach(const struct rte_eth_dev *dev,
return ret;
 }
 
-int __rte_experimental
+int
 dpaa_eth_eventq_detach(const struct rte_eth_dev *dev,
int eth_rx_queue_id)
 {
@@ -956,7 +956,7 @@ is_dpaa_supported(struct rte_eth_dev *dev)
return is_device_supported(dev, &rte_dpaa_pmd);
 }
 
-int __rte_experimental
+int
 rte_pmd_dpaa_set_tx_loopback(uint8_t port, uint8_t on)
 {
struct rte_eth_dev *dev;
diff --git a/drivers/net/dpaa/dpaa_ethdev.h b/drivers/net/dpaa/dpaa_ethdev.h
index 1897b9e..c79b9f8 100644
--- a/drivers/net/dpaa/dpaa_ethdev.h
+++ b/drivers/net/dpaa/dpaa_ethdev.h
@@ -160,12 +160,14 @@ struct dpaa_if_stats {
uint64_t tund;  /**
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
- *
  * Enable/Disable TX loopback
  *
  * @param port
@@ -33,7 +30,7 @@
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int __rte_experimental
+int
 rte_pmd_dpaa_set_tx_loopback(uint8_t port, uint8_t on);
 
 #endif /* _PMD_DPAA_H_ */
diff --git a/drivers/net/dpaa/rte_pmd_dpaa_version.map 
b/drivers/net/dpaa/rte_pmd_dpaa_version.map
index c7ad403..8cb4500 100644
--- a/drivers/net/dpaa/rte_pmd_dpaa_version.map
+++ b/drivers/net/dpaa/rte_pmd_dpaa_version.map
@@ -3,10 +3,10 @@ DPDK_17.11 {
local: *;
 };
 
-EXPERIMENTAL {
+DPDK_18.08 {
global:
 
dpaa_eth_eventq_attach;
dpaa_eth_eventq_detach;
rte_pmd_dpaa_set_tx_loopback;
-};
+} DPDK_17.11;
-- 
2.7.4



[dpdk-dev] [PATCH v3 12/16] bus/fslmc: cleanup unnecessary global variables

2018-07-06 Thread Hemant Agrawal
Signed-off-by: Pavan Nikhilesh 
Signed-off-by: Hemant Agrawal 
---
 drivers/bus/fslmc/qbman/qbman_portal.c | 3 +--
 drivers/bus/fslmc/qbman/qbman_portal.h | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/bus/fslmc/qbman/qbman_portal.c 
b/drivers/bus/fslmc/qbman/qbman_portal.c
index 713ec96..0714500 100644
--- a/drivers/bus/fslmc/qbman/qbman_portal.c
+++ b/drivers/bus/fslmc/qbman/qbman_portal.c
@@ -122,8 +122,7 @@ struct qbman_swp *qbman_swp_init(const struct 
qbman_swp_desc *d)
p->vdq.valid_bit = QB_VALID_BIT;
p->dqrr.next_idx = 0;
p->dqrr.valid_bit = QB_VALID_BIT;
-   qman_version = p->desc.qman_version;
-   if ((qman_version & 0x) < QMAN_REV_4100) {
+   if ((p->desc.qman_version & 0x) < QMAN_REV_4100) {
p->dqrr.dqrr_size = 4;
p->dqrr.reset_bug = 1;
} else {
diff --git a/drivers/bus/fslmc/qbman/qbman_portal.h 
b/drivers/bus/fslmc/qbman/qbman_portal.h
index 8bff0b4..dbea22a 100644
--- a/drivers/bus/fslmc/qbman/qbman_portal.h
+++ b/drivers/bus/fslmc/qbman/qbman_portal.h
@@ -7,7 +7,6 @@
 #include "qbman_sys.h"
 #include 
 
-uint32_t qman_version;
 #define QMAN_REV_4000   0x0400
 #define QMAN_REV_4100   0x0401
 #define QMAN_REV_4101   0x04010001
-- 
2.7.4



[dpdk-dev] [PATCH v3 14/16] net/dpaa: move the push queue set to global init

2018-07-06 Thread Hemant Agrawal
Signed-off-by: Hemant Agrawal 
Acked-by: Shreyansh Jain 
---
 drivers/net/dpaa/dpaa_ethdev.c | 24 ++--
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/net/dpaa/dpaa_ethdev.c b/drivers/net/dpaa/dpaa_ethdev.c
index 00611f8..5c0aafb 100644
--- a/drivers/net/dpaa/dpaa_ethdev.c
+++ b/drivers/net/dpaa/dpaa_ethdev.c
@@ -1174,16 +1174,6 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev)
}
 
 
-   /* if push mode queues to be enabled. Currenly we are allowing only
-* one queue per thread.
-*/
-   if (getenv("DPAA_PUSH_QUEUES_NUMBER")) {
-   dpaa_push_mode_max_queue =
-   atoi(getenv("DPAA_PUSH_QUEUES_NUMBER"));
-   if (dpaa_push_mode_max_queue > DPAA_MAX_PUSH_MODE_QUEUE)
-   dpaa_push_mode_max_queue = DPAA_MAX_PUSH_MODE_QUEUE;
-   }
-
/* Each device can not have more than DPAA_MAX_NUM_PCD_QUEUES RX
 * queues.
 */
@@ -1424,6 +1414,20 @@ rte_dpaa_probe(struct rte_dpaa_driver *dpaa_drv,
default_q = 1;
}
 
+   /* disabling the default push mode for LS1043 */
+   if (dpaa_svr_family == SVR_LS1043A_FAMILY)
+   dpaa_push_mode_max_queue = 0;
+
+   /* if push mode queues to be enabled. Currenly we are allowing
+* only one queue per thread.
+*/
+   if (getenv("DPAA_PUSH_QUEUES_NUMBER")) {
+   dpaa_push_mode_max_queue =
+   atoi(getenv("DPAA_PUSH_QUEUES_NUMBER"));
+   if (dpaa_push_mode_max_queue > DPAA_MAX_PUSH_MODE_QUEUE)
+   dpaa_push_mode_max_queue = DPAA_MAX_PUSH_MODE_QUEUE;
+   }
+
is_global_init = 1;
}
 
-- 
2.7.4



[dpdk-dev] [PATCH v3 15/16] bus/dpaa: add support for SG config

2018-07-06 Thread Hemant Agrawal
Signed-off-by: Hemant Agrawal 
---
 drivers/bus/dpaa/base/fman/fman_hw.c  | 42 +++
 drivers/bus/dpaa/include/fsl_fman.h   |  6 +
 drivers/bus/dpaa/rte_bus_dpaa_version.map |  2 ++
 3 files changed, 50 insertions(+)

diff --git a/drivers/bus/dpaa/base/fman/fman_hw.c 
b/drivers/bus/dpaa/base/fman/fman_hw.c
index 7ada7fa..4ebbc3d 100644
--- a/drivers/bus/dpaa/base/fman/fman_hw.c
+++ b/drivers/bus/dpaa/base/fman/fman_hw.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 
+#define FMAN_SP_SG_DISABLE  0x8000
 #define FMAN_SP_EXT_BUF_MARG_START_SHIFT16
 
 /* Instantiate the global variable that the inline CRC64 implementation (in
@@ -538,6 +539,47 @@ fman_if_get_maxfrm(struct fman_if *fm_if)
return (in_be32(reg_maxfrm) | 0x);
 }
 
+/* MSB in fmbm_rebm register
+ * 0 - If BMI cannot store the frame in a single buffer it may select a buffer
+ * of smaller size and store the frame in scatter gather (S/G) buffers
+ * 1 - Scatter gather format is not enabled for frame storage. If BMI cannot
+ * store the frame in a single buffer, the frame is discarded.
+ */
+
+int
+fman_if_get_sg_enable(struct fman_if *fm_if)
+{
+   u32 fmbm_rebm;
+
+   struct __fman_if *__if = container_of(fm_if, struct __fman_if, __if);
+
+   assert(fman_ccsr_map_fd != -1);
+
+   fmbm_rebm = in_be32(&((struct rx_bmi_regs *)__if->bmi_map)->fmbm_rebm);
+
+   return (fmbm_rebm & FMAN_SP_SG_DISABLE) ? 0 : 1;
+}
+
+void
+fman_if_set_sg(struct fman_if *fm_if, int enable)
+{
+   struct __fman_if *__if = container_of(fm_if, struct __fman_if, __if);
+   unsigned int *fmbm_rebm;
+   int val;
+   int fmbm_mask = FMAN_SP_SG_DISABLE;
+
+   if (enable)
+   val = 0;
+   else
+   val = FMAN_SP_SG_DISABLE;
+
+   assert(fman_ccsr_map_fd != -1);
+
+   fmbm_rebm = &((struct rx_bmi_regs *)__if->bmi_map)->fmbm_rebm;
+
+   out_be32(fmbm_rebm, (in_be32(fmbm_rebm) & ~fmbm_mask) | val);
+}
+
 void
 fman_if_set_dnia(struct fman_if *fm_if, uint32_t nia)
 {
diff --git a/drivers/bus/dpaa/include/fsl_fman.h 
b/drivers/bus/dpaa/include/fsl_fman.h
index c0ef1bf..1d1ce86 100644
--- a/drivers/bus/dpaa/include/fsl_fman.h
+++ b/drivers/bus/dpaa/include/fsl_fman.h
@@ -108,6 +108,12 @@ int fman_if_get_fdoff(struct fman_if *fm_if);
 /* Set interface fd->offset value */
 void fman_if_set_fdoff(struct fman_if *fm_if, uint32_t fd_offset);
 
+/* Get interface SG enable status value */
+int fman_if_get_sg_enable(struct fman_if *fm_if);
+
+/* Set interface SG support mode */
+void fman_if_set_sg(struct fman_if *fm_if, int enable);
+
 /* Get interface Max Frame length (MTU) */
 uint16_t fman_if_get_maxfrm(struct fman_if *fm_if);
 
diff --git a/drivers/bus/dpaa/rte_bus_dpaa_version.map 
b/drivers/bus/dpaa/rte_bus_dpaa_version.map
index e00c911..7d6d624 100644
--- a/drivers/bus/dpaa/rte_bus_dpaa_version.map
+++ b/drivers/bus/dpaa/rte_bus_dpaa_version.map
@@ -96,6 +96,8 @@ DPDK_18.02 {
 DPDK_18.08 {
global:
 
+   fman_if_get_sg_enable;
+   fman_if_set_sg;
of_get_mac_address;
 
local: *;
-- 
2.7.4



[dpdk-dev] [PATCH v3 16/16] net/dpaa: implement scatter offload support

2018-07-06 Thread Hemant Agrawal
Signed-off-by: Hemant Agrawal 
Acked-by: Shreyansh Jain 
---
 drivers/net/dpaa/dpaa_ethdev.c | 75 +-
 drivers/net/dpaa/dpaa_ethdev.h |  3 +-
 drivers/net/dpaa/dpaa_rxtx.c   |  4 +--
 drivers/net/dpaa/dpaa_rxtx.h   |  2 --
 4 files changed, 70 insertions(+), 14 deletions(-)

diff --git a/drivers/net/dpaa/dpaa_ethdev.c b/drivers/net/dpaa/dpaa_ethdev.c
index 5c0aafb..c49d3a5 100644
--- a/drivers/net/dpaa/dpaa_ethdev.c
+++ b/drivers/net/dpaa/dpaa_ethdev.c
@@ -47,7 +47,8 @@
 
 /* Supported Rx offloads */
 static uint64_t dev_rx_offloads_sup =
-   DEV_RX_OFFLOAD_JUMBO_FRAME;
+   DEV_RX_OFFLOAD_JUMBO_FRAME |
+   DEV_RX_OFFLOAD_SCATTER;
 
 /* Rx offloads which cannot be disabled */
 static uint64_t dev_rx_offloads_nodis =
@@ -55,8 +56,7 @@ static uint64_t dev_rx_offloads_nodis =
DEV_RX_OFFLOAD_UDP_CKSUM |
DEV_RX_OFFLOAD_TCP_CKSUM |
DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
-   DEV_RX_OFFLOAD_CRC_STRIP |
-   DEV_RX_OFFLOAD_SCATTER;
+   DEV_RX_OFFLOAD_CRC_STRIP;
 
 /* Supported Tx offloads */
 static uint64_t dev_tx_offloads_sup;
@@ -148,11 +148,30 @@ dpaa_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
struct dpaa_if *dpaa_intf = dev->data->dev_private;
uint32_t frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN
+ VLAN_TAG_SIZE;
+   uint32_t buffsz = dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM;
 
PMD_INIT_FUNC_TRACE();
 
if (mtu < ETHER_MIN_MTU || frame_size > DPAA_MAX_RX_PKT_LEN)
return -EINVAL;
+   /*
+* Refuse mtu that requires the support of scattered packets
+* when this feature has not been enabled before.
+*/
+   if (dev->data->min_rx_buf_size &&
+   !dev->data->scattered_rx && frame_size > buffsz) {
+   DPAA_PMD_ERR("SG not enabled, will not fit in one buffer");
+   return -EINVAL;
+   }
+
+   /* check  *   >= max_frame */
+   if (dev->data->min_rx_buf_size && dev->data->scattered_rx &&
+   (frame_size > buffsz * DPAA_SGT_MAX_ENTRIES)) {
+   DPAA_PMD_ERR("Too big to fit for Max SG list %d",
+   buffsz * DPAA_SGT_MAX_ENTRIES);
+   return -EINVAL;
+   }
+
if (frame_size > ETHER_MAX_LEN)
dev->data->dev_conf.rxmode.offloads &=
DEV_RX_OFFLOAD_JUMBO_FRAME;
@@ -196,13 +215,24 @@ dpaa_eth_dev_configure(struct rte_eth_dev *dev)
if (rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
DPAA_MAX_RX_PKT_LEN) {
+   DPAA_PMD_DEBUG("enabling jumbo");
fman_if_set_maxfrm(dpaa_intf->fif,
dev->data->dev_conf.rxmode.max_rx_pkt_len);
-   return 0;
+   dev->data->mtu =
+   dev->data->dev_conf.rxmode.max_rx_pkt_len -
+   ETHER_HDR_LEN - ETHER_CRC_LEN - VLAN_TAG_SIZE;
} else {
-   return -1;
+   DPAA_PMD_ERR("enabling jumbo err conf max len=%d "
+   "supported is %d",
+   dev->data->dev_conf.rxmode.max_rx_pkt_len,
+   DPAA_MAX_RX_PKT_LEN);
}
}
+   if (rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
+   DPAA_PMD_DEBUG("enabling scatter mode");
+   fman_if_set_sg(dpaa_intf->fif, 1);
+   dev->data->scattered_rx = 1;
+   }
return 0;
 }
 
@@ -300,7 +330,6 @@ static void dpaa_eth_dev_info(struct rte_eth_dev *dev,
 
dev_info->max_rx_queues = dpaa_intf->nb_rx_queues;
dev_info->max_tx_queues = dpaa_intf->nb_tx_queues;
-   dev_info->min_rx_bufsize = DPAA_MIN_RX_BUF_SIZE;
dev_info->max_rx_pktlen = DPAA_MAX_RX_PKT_LEN;
dev_info->max_mac_addrs = DPAA_MAX_MAC_FILTER;
dev_info->max_hash_mac_addrs = 0;
@@ -514,6 +543,7 @@ int dpaa_eth_rx_queue_setup(struct rte_eth_dev *dev, 
uint16_t queue_idx,
struct qm_mcc_initfq opts = {0};
u32 flags = 0;
int ret;
+   u32 buffsz = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
 
PMD_INIT_FUNC_TRACE();
 
@@ -527,6 +557,27 @@ int dpaa_eth_rx_queue_setup(struct rte_eth_dev *dev, 
uint16_t queue_idx,
DPAA_PMD_INFO("Rx queue setup for queue index: %d fq_id (0x%x)",
queue_idx, rxq->fqid);
 
+   /* Max packet can fit in single buffer */
+   if (dev->data->dev_conf.rxmode.max_rx_pkt_len <= buffsz) {
+   ;
+   } else if (dev->data->dev_conf.rxmode.enable_scatter) {
+   if (dev->data->dev_conf.rxmode.max_rx_pkt_len >
+   buffsz * DPAA_SGT_

[dpdk-dev] [PATCH v3 13/16] drivers: support function name in logs trace

2018-07-06 Thread Hemant Agrawal
Signed-off-by: Hemant Agrawal 
Acked-by: Shreyansh Jain 
---
 drivers/bus/fslmc/fslmc_logs.h | 2 +-
 drivers/crypto/dpaa2_sec/dpaa2_sec_logs.h  | 2 +-
 drivers/crypto/dpaa_sec/dpaa_sec_log.h | 2 +-
 drivers/event/dpaa2/dpaa2_eventdev_logs.h  | 2 +-
 drivers/net/dpaa2/dpaa2_pmd_logs.h | 2 +-
 drivers/raw/dpaa2_cmdif/dpaa2_cmdif_logs.h | 2 +-
 drivers/raw/dpaa2_qdma/dpaa2_qdma_logs.h   | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/bus/fslmc/fslmc_logs.h b/drivers/bus/fslmc/fslmc_logs.h
index 9750b8c..dd74cb7 100644
--- a/drivers/bus/fslmc/fslmc_logs.h
+++ b/drivers/bus/fslmc/fslmc_logs.h
@@ -18,7 +18,7 @@ extern int dpaa2_logtype_bus;
rte_log(RTE_LOG_DEBUG, dpaa2_logtype_bus, "fslmc: %s(): " fmt "\n", \
__func__, ##args)
 
-#define BUS_INIT_FUNC_TRACE() DPAA2_BUS_LOG(DEBUG, " >>")
+#define BUS_INIT_FUNC_TRACE() DPAA2_BUS_DEBUG(" >>")
 
 #define DPAA2_BUS_INFO(fmt, args...) \
DPAA2_BUS_LOG(INFO, fmt, ## args)
diff --git a/drivers/crypto/dpaa2_sec/dpaa2_sec_logs.h 
b/drivers/crypto/dpaa2_sec/dpaa2_sec_logs.h
index 7c1f5e7..8a99044 100644
--- a/drivers/crypto/dpaa2_sec/dpaa2_sec_logs.h
+++ b/drivers/crypto/dpaa2_sec/dpaa2_sec_logs.h
@@ -18,7 +18,7 @@ extern int dpaa2_logtype_sec;
rte_log(RTE_LOG_DEBUG, dpaa2_logtype_sec, "dpaa2_sec: %s(): " \
fmt "\n", __func__, ##args)
 
-#define PMD_INIT_FUNC_TRACE() DPAA2_SEC_LOG(DEBUG, " >>")
+#define PMD_INIT_FUNC_TRACE() DPAA2_SEC_DEBUG(">>")
 
 #define DPAA2_SEC_INFO(fmt, args...) \
DPAA2_SEC_LOG(INFO, fmt, ## args)
diff --git a/drivers/crypto/dpaa_sec/dpaa_sec_log.h 
b/drivers/crypto/dpaa_sec/dpaa_sec_log.h
index 9784fcb..fb895a8 100644
--- a/drivers/crypto/dpaa_sec/dpaa_sec_log.h
+++ b/drivers/crypto/dpaa_sec/dpaa_sec_log.h
@@ -18,7 +18,7 @@ extern int dpaa_logtype_sec;
rte_log(RTE_LOG_DEBUG, dpaa_logtype_sec, "dpaa_sec: %s(): " \
fmt "\n", __func__, ##args)
 
-#define PMD_INIT_FUNC_TRACE() DPAA_SEC_LOG(DEBUG, " >>")
+#define PMD_INIT_FUNC_TRACE() DPAA_SEC_DEBUG(" >>")
 
 #define DPAA_SEC_INFO(fmt, args...) \
DPAA_SEC_LOG(INFO, fmt, ## args)
diff --git a/drivers/event/dpaa2/dpaa2_eventdev_logs.h 
b/drivers/event/dpaa2/dpaa2_eventdev_logs.h
index 48f1abd..a2c2060 100644
--- a/drivers/event/dpaa2/dpaa2_eventdev_logs.h
+++ b/drivers/event/dpaa2/dpaa2_eventdev_logs.h
@@ -16,7 +16,7 @@ extern int dpaa2_logtype_event;
rte_log(RTE_LOG_DEBUG, dpaa2_logtype_event, "dpaa2_event: %s(): " \
fmt "\n", __func__, ##args)
 
-#define EVENTDEV_INIT_FUNC_TRACE() DPAA2_EVENTDEV_LOG(DEBUG, " >>")
+#define EVENTDEV_INIT_FUNC_TRACE() DPAA2_EVENTDEV_DEBUG(" >>")
 
 #define DPAA2_EVENTDEV_INFO(fmt, args...) \
DPAA2_EVENTDEV_LOG(INFO, fmt, ## args)
diff --git a/drivers/net/dpaa2/dpaa2_pmd_logs.h 
b/drivers/net/dpaa2/dpaa2_pmd_logs.h
index 98a4896..c04babd 100644
--- a/drivers/net/dpaa2/dpaa2_pmd_logs.h
+++ b/drivers/net/dpaa2/dpaa2_pmd_logs.h
@@ -16,7 +16,7 @@ extern int dpaa2_logtype_pmd;
rte_log(RTE_LOG_DEBUG, dpaa2_logtype_pmd, "dpaa2_net: %s(): "\
fmt "\n", __func__, ##args)
 
-#define PMD_INIT_FUNC_TRACE() DPAA2_PMD_LOG(DEBUG, " >>")
+#define PMD_INIT_FUNC_TRACE() DPAA2_PMD_DEBUG(">>")
 
 #define DPAA2_PMD_CRIT(fmt, args...) \
DPAA2_PMD_LOG(CRIT, fmt, ## args)
diff --git a/drivers/raw/dpaa2_cmdif/dpaa2_cmdif_logs.h 
b/drivers/raw/dpaa2_cmdif/dpaa2_cmdif_logs.h
index 598a621..8991e83 100644
--- a/drivers/raw/dpaa2_cmdif/dpaa2_cmdif_logs.h
+++ b/drivers/raw/dpaa2_cmdif/dpaa2_cmdif_logs.h
@@ -19,7 +19,7 @@ extern int dpaa2_cmdif_logtype;
rte_log(RTE_LOG_DEBUG, dpaa2_cmdif_logtype, "dpaa2_cmdif: %s(): " \
fmt "\n", __func__, ## args)
 
-#define DPAA2_CMDIF_FUNC_TRACE() DPAA2_CMDIF_LOG(DEBUG, ">>")
+#define DPAA2_CMDIF_FUNC_TRACE() DPAA2_CMDIF_DEBUG(">>")
 
 #define DPAA2_CMDIF_INFO(fmt, args...) \
DPAA2_CMDIF_LOG(INFO, fmt, ## args)
diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma_logs.h 
b/drivers/raw/dpaa2_qdma/dpaa2_qdma_logs.h
index fafe352..4779e4c 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma_logs.h
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma_logs.h
@@ -19,7 +19,7 @@ extern int dpaa2_qdma_logtype;
rte_log(RTE_LOG_DEBUG, dpaa2_qdma_logtype, "dpaa2_qdma: %s(): " \
fmt "\n", __func__, ## args)
 
-#define DPAA2_QDMA_FUNC_TRACE() DPAA2_QDMA_LOG(DEBUG, ">>")
+#define DPAA2_QDMA_FUNC_TRACE() DPAA2_QDMA_DEBUG(">>")
 
 #define DPAA2_QDMA_INFO(fmt, args...) \
DPAA2_QDMA_LOG(INFO, fmt, ## args)
-- 
2.7.4



Re: [dpdk-dev] [PATCH v2 09/16] net/dpaa: support default queue mode

2018-07-06 Thread Hemant Agrawal

On Wednesday 04 July 2018 03:13 PM, Hemant Agrawal wrote:
> In case DPAA FMAN configuration tool (FMC) is not available.
> System can still work with default queue. (1 queue per port).

The commit message needs to reflect the fact this patch is also fixing some 
debugging logs which has not relation with default queue. Or, the patch should 
be split.

Also, the commit message can be rephrased for grammatical correctness:

"In case DPAA FMAN configuration tool is not availble, the system can still 
work with default queues (1 queue per port)."

[Hemant]  OK


Re: [dpdk-dev] [PATCH v2 05/16] net/dpaa2: fix the prefetch Rx to honor nb pkts

2018-07-06 Thread Hemant Agrawal
On Wednesday 04 July 2018 03:13 PM, Hemant Agrawal wrote:
> This patch fix the prefetch rx routine to
 ^
 fixes
> set the next prefetch request to the size of nb_pkts.
> This will assume that next request will ideally will be of same size.

Incorrect wording.
Maybe:
"It assumes that next request would ideally be of same size"

[Hemant] I have taken care of it in v3.


Re: [dpdk-dev] [PATCH v2 15/20] net/mlx5: support inner RSS computation

2018-07-06 Thread Yongseok Koh
On Wed, Jun 27, 2018 at 05:07:47PM +0200, Nelio Laranjeiro wrote:
> Signed-off-by: Nelio Laranjeiro 
> ---
>  drivers/net/mlx5/mlx5_flow.c | 131 +--
>  drivers/net/mlx5/mlx5_rxtx.h |   1 -
>  2 files changed, 96 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> index 7dda88641..eedf0c461 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -219,6 +219,8 @@ struct rte_flow {
>   struct mlx5_flow_verbs *cur_verbs;
>   /**< Current Verbs flow structure being filled. */
>   struct rte_flow_action_rss rss;/**< RSS context. */
> + uint32_t ptype;
> + /**< Store tunnel packet type data to store in Rx queue. */
>   uint8_t key[40]; /**< RSS hash key. */
>   uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
>  };
> @@ -1320,13 +1322,15 @@ mlx5_flow_action_queue(struct rte_eth_dev *dev,
>   *   Pointer to flow structure.
>   * @param types
>   *   RSS types for this flow (see ETH_RSS_*).
> + * @param level
> + *   RSS level.
>   *
>   * @return
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  static int
>  mlx5_flow_action_rss_verbs_attr(struct rte_eth_dev *dev, struct rte_flow 
> *flow,
> - uint32_t types)
> + uint32_t types, uint32_t level)
>  {
>   const uint32_t layers = mlx5_flow_layers(flow);
>   uint64_t hash_fields;
> @@ -1374,6 +1378,8 @@ mlx5_flow_action_rss_verbs_attr(struct rte_eth_dev 
> *dev, struct rte_flow *flow,
>   hash_fields = 0;
>   priority = 2;
>   }
> + if (hash_fields && level == 2)
> + hash_fields |= IBV_RX_HASH_INNER;
>   flow->cur_verbs->hash_fields = hash_fields;
>   flow->cur_verbs->attr->priority =
>   mlx5_flow_priority(dev, flow->attributes.priority, priority);
> @@ -1416,7 +1422,7 @@ mlx5_flow_action_rss(struct rte_eth_dev *dev,
> RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> &rss->func,
> "RSS hash function not supported");
> - if (rss->level > 1)
> + if (rss->level > 2)
>   return rte_flow_error_set(error, ENOTSUP,
> RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> &rss->level,
> @@ -1456,6 +1462,7 @@ mlx5_flow_action_rss(struct rte_eth_dev *dev,
>   flow->rss.queue_num = rss->queue_num;
>   memcpy(flow->key, rss->key, rss_hash_default_key_len);
>   flow->rss.types = rss->types;
> + flow->rss.level = rss->level;
>   flow->fate |= MLX5_FLOW_FATE_RSS;
>   return 0;
>  }
> @@ -1814,7 +1821,8 @@ mlx5_flow_merge(struct rte_eth_dev *dev, struct 
> rte_flow *flow,
>   flow->cur_verbs->attr->priority =
>   flow->attributes.priority;
>   ret = mlx5_flow_action_rss_verbs_attr(dev, flow,
> -   flow->rss.types);
> +   flow->rss.types,
> +   flow->rss.level);
>   if (ret < 0)
>   goto error;
>   LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next);
> @@ -1828,27 +1836,6 @@ mlx5_flow_merge(struct rte_eth_dev *dev, struct 
> rte_flow *flow,
>   return ret;
>  }
>  
> -/**
> - * Mark the Rx queues mark flag if the flow has a mark or flag modifier.
> - *
> - * @param dev
> - *   Pointer to Ethernet device.
> - * @param flow
> - *   Pointer to flow structure.
> - */
> -static void
> -mlx5_flow_rxq_mark(struct rte_eth_dev *dev, struct rte_flow *flow)
> -{
> - struct priv *priv = dev->data->dev_private;
> - const uint32_t mask = MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK;
> - uint32_t i;
> -
> - if (!(flow->modifier & mask))
> - return;
> - for (i = 0; i != flow->rss.queue_num; ++i)
> - (*priv->rxqs)[(*flow->queue)[i]]->mark = 1;
> -}
> -
>  /**
>   * Validate a flow supported by the NIC.
>   *
> @@ -1978,6 +1965,88 @@ mlx5_flow_fate_apply(struct rte_eth_dev *dev, struct 
> rte_flow *flow,
>   return -rte_errno;
>  }
>  
> +/**
> + * Set the Tunnel packet type and the Mark in the Rx queue.
> + *
> + * @param dev
> + *   Pointer to Ethernet device.
> + * @param idx
> + *   Rx queue index.
> + */
> +static void
> +mlx5_flow_rxq(struct rte_eth_dev *dev, uint16_t idx)
> +{
> + struct priv *priv = dev->data->dev_private;
> + struct rte_flow *flow;
> + const uint32_t mark_m = MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK;
> + uint32_t ptype = 0;
> + uint32_t mark = 0;
> +
> + TAILQ_FOREACH(flow, &priv->flows, next) {
> + unsigned int i;
> +
> + for (i = 0; i != flow->

Re: [dpdk-dev] [PATCH v2 12/20] net/mlx5: add mark/flag flow action

2018-07-06 Thread Nélio Laranjeiro
On Thu, Jul 05, 2018 at 12:56:09PM -0700, Yongseok Koh wrote:
>[...]
> > > > +   if (mark->id >= MLX5_FLOW_MARK_MAX)
> > > > +   return rte_flow_error_set(error, EINVAL,
> > > > + 
> > > > RTE_FLOW_ERROR_TYPE_ACTION_CONF,
> > > > + &mark->id,
> > > > + "mark must be between 0 and"
> > > > + " 16777199");
> > > 
> > > Use %d and (MLX5_FLOW_MARK_MAX - 1), instead of fixed string.
> > 
> > It needs an snprintf, rte_flow_error_set() does not accept formatting
> > strings.
> 
> I think the following would work but never mind. I'm okay with leaving it as 
> is.
> No need to make a change here.
> 
> #define STRINGIFY(x) #x
> #define TOSTRING(x) STRINGIFY(x)
>   "mark must be between 0 and "
>   TOSTRING(MLX5_FLOW_MARK_MAX - 1));
> 

It was to avoid adding a macro, but indeed there is the same kind in
mlx4, I'll port them for mlx5.

> > >[...]
> > Addressing both question here, for the flow_stop() and flow_destroy()
> > the process is different, for the stop, the flow remains with the mark
> > bit set but all queues must me cleared, there is no comparison to make.
> > As you can see, it don't even get a flow, it directly unset the mask bit
> > in the Rx queues.
> > For the destroy the issue is different, several flows may be using the
> > same Rx queues, if one of them will remains and has a mark, then the
> > associated queues must keep their mark bit set.
> > As the process is different, it would end in two distinct functions and
> > each one used by a single function.
> > 
> > For the mlx5_flow_rxq_mark(), the situation is different, the same
> > process is make when a flow is created and the flow are started.
> 
> I knew the differences but I just wanted to ask if having a separate function
> can be a viable option, e.g.,
> 
> mlx5_flow_rxq_mark_set()
> mlx5_flow_rxq_mark_clear()
> mlx5_flow_rxq_mark_trim()

Certainly, the point is those functions have a short life as few patches
letter they will be removed.
I suppose you prefer to have them and I don't think it will take too
much time to add such function, it will make part of the next revision
;).

Thanks,

-- 
Nélio Laranjeiro
6WIND


Re: [dpdk-dev] [PATCH v3 3/4] compressdev: replace mbuf scatter gather flag

2018-07-06 Thread De Lara Guarch, Pablo



> -Original Message-
> From: Verma, Shally [mailto:shally.ve...@cavium.com]
> Sent: Thursday, July 5, 2018 12:59 PM
> To: De Lara Guarch, Pablo ; Gupta, Ashish
> ; Trahe, Fiona ; Daly, Lee
> ; Sahu, Sunila 
> Cc: dev@dpdk.org
> Subject: RE: [PATCH v3 3/4] compressdev: replace mbuf scatter gather flag
> 
> 
> 
> >-Original Message-
> >From: De Lara Guarch, Pablo [mailto:pablo.de.lara.gua...@intel.com]
> >Sent: 05 July 2018 16:56
> >To: Verma, Shally ; Gupta, Ashish
> >; Trahe, Fiona ; Daly,
> >Lee ; Sahu, Sunila 
> >Cc: dev@dpdk.org
> >Subject: RE: [PATCH v3 3/4] compressdev: replace mbuf scatter gather
> >flag
> >
> >External Email
> >
> >> -Original Message-
> >> From: Verma, Shally [mailto:shally.ve...@cavium.com]
> >> Sent: Thursday, July 5, 2018 12:13 PM
> >> To: De Lara Guarch, Pablo ; Gupta,
> >> Ashish ; Trahe, Fiona
> >> ; Daly, Lee ; Sahu, Sunila
> >> 
> >> Cc: dev@dpdk.org
> >> Subject: RE: [PATCH v3 3/4] compressdev: replace mbuf scatter gather
> >> flag
> >>
> >>
> >>
> >> >-Original Message-
> >> >From: De Lara Guarch, Pablo [mailto:pablo.de.lara.gua...@intel.com]
> >> >Sent: 05 July 2018 16:36
> >> >To: Verma, Shally ; Gupta, Ashish
> >> >; Trahe, Fiona ;
> >> >Daly, Lee ; Sahu, Sunila
> >> >
> >> >Cc: dev@dpdk.org
> >> >Subject: RE: [PATCH v3 3/4] compressdev: replace mbuf scatter gather
> >> >flag
> >> >
> >> >External Email
> >> >
> >> >> -Original Message-
> >> >> From: Verma, Shally [mailto:shally.ve...@cavium.com]
> >> >> Sent: Thursday, July 5, 2018 9:39 AM
> >> >> To: De Lara Guarch, Pablo ; Gupta,
> >> >> Ashish ; Trahe, Fiona
> >> >> ; Daly, Lee ; Sahu,
> >> >> Sunila 
> >> >> Cc: dev@dpdk.org
> >> >> Subject: RE: [PATCH v3 3/4] compressdev: replace mbuf scatter
> >> >> gather flag
> >> >>
> >> >>
> >> >>
> >> >> >-Original Message-
> >> >> >From: Pablo de Lara [mailto:pablo.de.lara.gua...@intel.com]
> >> >> >Sent: 04 July 2018 19:41
> >> >> >To: Verma, Shally ; Gupta, Ashish
> >> >> >; fiona.tr...@intel.com;
> >> >> >lee.d...@intel.com
> >> >> >Cc: dev@dpdk.org; Pablo de Lara 
> >> >> >Subject: [PATCH v3 3/4] compressdev: replace mbuf scatter gather
> >> >> >flag
> >> >> >
> >> >> >External Email
> >> >> >
> >> >> >The current mbuf scatter gather feature flag is too ambiguous, as
> >> >> >it is not clear if input and/or output buffers can be scatter
> >> >> >gather mbufs or not.
> >> >> >
> >> >> >Therefore, three new flags will replace this flag:
> >> >> >- RTE_COMP_FF_OOP_SGL_IN_SGL_OUT
> >> >> >- RTE_COMP_FF_OOP_SGL_IN_FB_OUT
> >> >> >- RTE_COMP_FF_OOP_FB_IN_SGL_OUT
> >> >> >
> >> >> [Shally] Believe Out of place is default support on current
> >> >> compression API, so why do we need _OOP_ here?
> >> >
> >> >Hi Shally,
> >> >
> >> >You are right, but I just wanted to clarify that the scenario is for
> >> >Out of place
> >> only.
> >> >
> >> Ok. But that looks redundant to me. Though not likely, tomorrow if
> >> some algo support in-place, Then we will end up adding in_place
> >> equivalent of same. So would prefer to keep naming generic of in/out
> >> place and specific to Scatter- gather in/out support.
> >
> >I think I am not quite following you. Actually, if in the future we
> >support In-place, then it is important to have OOP in the macro, to
> >specify that SGL is supported for Out-of-place and maybe not in-place (like 
> >in
> cryptodev).
> >Otherwise, we would need to break the API, which can be avoided now.
> 
> Ohh okay, now I get it. So these feature flags intend to show input/output 
> mode
> supported specifically for in/out of place operations.  But then still I see 
> having
> OOP isn't required as compression default support is out-of-place and it's 
> just
> making feature name too big. Having in-place is exception and if supported, 
> can
> use convention RTE_COMP_FF_INPLACE_xx

I would still prefer having OOP, to be consistent with cryptodev. It is also 
not that long, it is just 3 letters.

> 
> Above one comment, as I see it, use of FB in
> RTE_COMP_FF_OOP_FB_IN_SGL_OUT didn't give clear indication what it mean.
> May be replace it by RTE_COMP_FF_OOP_DIRECT/LINEAR_IN_SGL_OUT

Linear could be a good option, but it is missing a noun there. What about LB 
(linear buffer), so we keep it short too.

Pablo


Re: [dpdk-dev] [PATCH v3 3/4] compressdev: replace mbuf scatter gather flag

2018-07-06 Thread Verma, Shally



>-Original Message-
>From: De Lara Guarch, Pablo [mailto:pablo.de.lara.gua...@intel.com]
>Sent: 06 July 2018 14:10
>To: Verma, Shally ; Gupta, Ashish 
>; Trahe, Fiona ;
>Daly, Lee ; Sahu, Sunila 
>Cc: dev@dpdk.org
>Subject: RE: [PATCH v3 3/4] compressdev: replace mbuf scatter gather flag
>

//snip

>> Ohh okay, now I get it. So these feature flags intend to show input/output 
>> mode
>> supported specifically for in/out of place operations.  But then still I see 
>> having
>> OOP isn't required as compression default support is out-of-place and it's 
>> just
>> making feature name too big. Having in-place is exception and if supported, 
>> can
>> use convention RTE_COMP_FF_INPLACE_xx
>
>I would still prefer having OOP, to be consistent with cryptodev. It is also 
>not that long, it is just 3 letters.
>
>>
>> Above one comment, as I see it, use of FB in
>> RTE_COMP_FF_OOP_FB_IN_SGL_OUT didn't give clear indication what it mean.
>> May be replace it by RTE_COMP_FF_OOP_DIRECT/LINEAR_IN_SGL_OUT
>
>Linear could be a good option, but it is missing a noun there. What about LB 
>(linear buffer), so we keep it short too.

Ok. LB looks fine. hopefully, that will make apparent to reader, that it's 
opposite of Scatter-Gather.

Thanks
Shally

>
>Pablo


Re: [dpdk-dev] [PATCH v3 3/4] compressdev: replace mbuf scatter gather flag

2018-07-06 Thread De Lara Guarch, Pablo



> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Verma, Shally
> Sent: Friday, July 6, 2018 9:53 AM
> To: De Lara Guarch, Pablo ; Gupta, Ashish
> ; Trahe, Fiona ; Daly, Lee
> ; Sahu, Sunila 
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 3/4] compressdev: replace mbuf scatter
> gather flag
> 
> 
> 
> >-Original Message-
> >From: De Lara Guarch, Pablo [mailto:pablo.de.lara.gua...@intel.com]
> >Sent: 06 July 2018 14:10
> >To: Verma, Shally ; Gupta, Ashish
> >; Trahe, Fiona ; Daly,
> >Lee ; Sahu, Sunila 
> >Cc: dev@dpdk.org
> >Subject: RE: [PATCH v3 3/4] compressdev: replace mbuf scatter gather
> >flag
> >
> 
> //snip
> 
> >> Ohh okay, now I get it. So these feature flags intend to show
> >> input/output mode supported specifically for in/out of place
> >> operations.  But then still I see having OOP isn't required as
> >> compression default support is out-of-place and it's just making
> >> feature name too big. Having in-place is exception and if supported,
> >> can use convention RTE_COMP_FF_INPLACE_xx
> >
> >I would still prefer having OOP, to be consistent with cryptodev. It is also 
> >not
> that long, it is just 3 letters.
> >
> >>
> >> Above one comment, as I see it, use of FB in
> >> RTE_COMP_FF_OOP_FB_IN_SGL_OUT didn't give clear indication what it
> mean.
> >> May be replace it by RTE_COMP_FF_OOP_DIRECT/LINEAR_IN_SGL_OUT
> >
> >Linear could be a good option, but it is missing a noun there. What about LB
> (linear buffer), so we keep it short too.
> 
> Ok. LB looks fine. hopefully, that will make apparent to reader, that it's 
> opposite
> of Scatter-Gather.

I can clarify a bit more in the comments.

Thanks,
Pablo

> 
> Thanks
> Shally
> 
> >
> >Pablo


Re: [dpdk-dev] [PATCH] mk: using initial-exec model for thread local variable

2018-07-06 Thread Bruce Richardson
On Fri, Jul 06, 2018 at 02:22:14AM +, Liu, Yong wrote:
> 
> 
> > -Original Message-
> > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Sachin Saxena
> > Sent: Thursday, July 05, 2018 10:46 PM
> > To: Liu, Yong ; Yang, Zhiyong ;
> > tho...@monjalon.net; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH] mk: using initial-exec model for thread
> > local variable
> > 
> > 
> > 
> > >
> > > When building share library, thread-local storage model will be changed
> > to
> > > global-dynamic. It will add additional cost for reading thread local
> > variable.
> > > On the other hand, dynamically load share library with static TLS will
> > request
> > > additional DTV slot which is limited by loader. By now only
> > librte_pmd_eal.so
> > > contain thread local variable. So that can make TLS model back to
> > initial-exec
> > > like static library for better performance.
> > >
> > > Signed-off-by: Marvin Liu 
> > >
> > > diff --git a/mk/toolchain/gcc/rte.vars.mk b/mk/toolchain/gcc/rte.vars.mk
> > > index 7e4531bab..19d5e11ef 100644
> > > --- a/mk/toolchain/gcc/rte.vars.mk
> > > +++ b/mk/toolchain/gcc/rte.vars.mk
> > > @@ -43,6 +43,13 @@ ifeq (,$(findstring -O0,$(EXTRA_CFLAGS)))  endif
> > endif
> > >
> > > +# Initial execution TLS model has better performane compared to dynamic
> > > +# global. But this model require for addtional slot on DTV when dlopen
> > > +# object with thread local variable.
> > > +ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),y)
> > > +TOOLCHAIN_CFLAGS += -ftls-model=initial-exec endif
> > > +
> > 
> > [Sachin Saxena]   Using initial-exec model for shared object is not
> > recommended. If you link a shared object containing IE-model, the object
> > will have the DF_STATIC_TLS flag set. By the spec, this means that dlopen()
> > might refuse to load it if TLS usage is greater than static TLS space.
> > This is what happening, when I tried to validate this change on ARM64
> > based NXP platform with VPP-dpdk solution. VPP initialization fails with
> > following error:
> >   "load_one_plugin:145: /usr/lib/vpp_plugins/dpdk_plugin.so: cannot
> > allocate memory in static TLS block"
> > 
> > Note that dpdk dpaa2 driver and VPP both uses TLS variables quite
> > significantly. When forced to Initial-exec model in dpdk shared object,
> > VPP static TLS space is getting exhausted and dlopen() returns error while
> > trying to load the DPDK object.
> > For same reason, when we use "-fPIC" the default TLS model changed to
> > "global-dynamics" from "Initial-exec".
> > 
> > In my opinion, this change should not be merged as it is breaking basic
> > functionality.
> 
> Thanks for your opinion, Sachin. 
> IE model may cause problem when using dlopen open share object. On the other 
> hand, it can benefit performance.
> It will be better to keep current workable setting and users may change it by 
> themselves.
> 
What is the performance delta, and where is it most seen? I suggest for
future patches like this, that the commit message itself should give a
rough/approx indication of the perf impacts.

/Bruce


Re: [dpdk-dev] [PATCH v2 1/6] hash: make duplicated code into functions

2018-07-06 Thread De Lara Guarch, Pablo
Hi Yipeng,

> -Original Message-
> From: Wang, Yipeng1
> Sent: Friday, June 29, 2018 1:25 PM
> To: De Lara Guarch, Pablo 
> Cc: dev@dpdk.org; Wang, Yipeng1 ; Richardson,
> Bruce ; honnappa.nagaraha...@arm.com;
> vgu...@caviumnetworks.com; brijesh.s.si...@gmail.com
> Subject: [PATCH v2 1/6] hash: make duplicated code into functions
> 
> This commit refactors the hash table lookup/add/del code to remove some code
> duplication. Processing on primary bucket can also apply to secondary bucket
> with same code.
> 
> Signed-off-by: Yipeng Wang 
> ---
>  lib/librte_hash/rte_cuckoo_hash.c | 186 
> ++

...

> +/* Search one bucket to find the match key */
>  static inline int32_t
> -__rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
> - hash_sig_t sig, void **data)
> +search_one_bucket(const struct rte_hash *h, const void *key, hash_sig_t sig,
> + void **data, struct rte_hash_bucket *bkt)

Use "const" in "struct rte_hash_bucket".

...

> +search_and_remove(const struct rte_hash *h, const void *key,
> + struct rte_hash_bucket *bkt, hash_sig_t sig,
> + int32_t *ret_val)
>  {
> - uint32_t bucket_idx;
> - hash_sig_t alt_hash;
> - unsigned i;
> - struct rte_hash_bucket *bkt;
>   struct rte_hash_key *k, *keys = h->key_store;
> - int32_t ret;
> -
> - bucket_idx = sig & h->bucket_bitmask;
> - bkt = &h->buckets[bucket_idx];
> + unsigned int i;
> 
>   /* Check if key is in primary location */
>   for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { @@ -833,37 +825,39
> @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
>* Return index where key is stored,
>* subtracting the first dummy index
>*/
> - ret = bkt->key_idx[i] - 1;
> + *ret_val = bkt->key_idx[i] - 1;
>   bkt->key_idx[i] = EMPTY_SLOT;
> - return ret;
> + return 0;

You can store ret_val and return it, instead of returning 0,
so the function is similar to the other search functions.


>   }
>   }
>   }
> + return -1;



Re: [dpdk-dev] [PATCH v2] librte_lpm: Improve performance of the delete and add functions

2018-07-06 Thread Bruce Richardson
On Mon, Jul 02, 2018 at 07:42:11PM +0300, Alex Kiselev wrote:
> There are two major problems with the library:
> first, there is no need to rebuild the whole LPM tree
> when a rule is deleted and second, due to the current
> rules algorithm with complexity O(n) it's almost
> impossible to deal with large rule sets (50k or so rules).
> This patch addresses those two issues.
> 
> Signed-off-by: Alex Kiselev 
> ---
>  lib/librte_lpm/rte_lpm6.c | 1073 
> ++---
>  1 file changed, 816 insertions(+), 257 deletions(-)
> 

I get a compiler error with gcc8 after this patch:

/home/bruce/dpdk.org/lib/librte_lpm/rte_lpm6.c: In function 
‘rte_lpm6_add_v1705’:
/home/bruce/dpdk.org/lib/librte_lpm/rte_lpm6.c:748:18: error: ‘tbl_next_num’ 
may be used uninitialized in this function [-Werror=maybe-uninitialized]
lpm->tbl8_hdrs[tbl_ind].ref_cnt++;
^

"check-git-log.sh" and "checkpatches.sh" are also reporting issues with
this patch. Please check these too.

Some code review comments to follow.

Regards,
/Bruce


Re: [dpdk-dev] [PATCH v2] librte_lpm: Improve performance of the delete and add functions

2018-07-06 Thread Bruce Richardson
On Mon, Jul 02, 2018 at 07:42:11PM +0300, Alex Kiselev wrote:
> There are two major problems with the library:
> first, there is no need to rebuild the whole LPM tree
> when a rule is deleted and second, due to the current
> rules algorithm with complexity O(n) it's almost
> impossible to deal with large rule sets (50k or so rules).
> This patch addresses those two issues.
> 
> Signed-off-by: Alex Kiselev 
> ---
>  lib/librte_lpm/rte_lpm6.c | 1073 
> ++---
>  1 file changed, 816 insertions(+), 257 deletions(-)
> 
> diff --git a/lib/librte_lpm/rte_lpm6.c b/lib/librte_lpm/rte_lpm6.c
> index 149677eb1..438db0831 100644
> --- a/lib/librte_lpm/rte_lpm6.c
> +++ b/lib/librte_lpm/rte_lpm6.c
> @@ -21,6 +21,10 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
> +#include 
> +#include 

For correct compilation, you now need to specify the dependency on the hash
and mempool libraries in both make and meson build files. I believe
something like this should do the trick:

--- a/lib/Makefile
+++ b/lib/Makefile
@@ -47,7 +47,7 @@ DEPDIRS-librte_hash := librte_eal librte_ring
 DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd
 DEPDIRS-librte_efd := librte_eal librte_ring librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
-DEPDIRS-librte_lpm := librte_eal
+DEPDIRS-librte_lpm := librte_eal librte_mempool librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
 DEPDIRS-librte_acl := librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_MEMBER) += librte_member

--- a/lib/librte_lpm/meson.build
+++ b/lib/librte_lpm/meson.build
@@ -7,3 +7,4 @@ headers = files('rte_lpm.h', 'rte_lpm6.h')
 # since header files have different names, we can install all vector headers
 # without worrying about which architecture we actually need
 headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h')
+deps += ['mempool', 'hash']



Re: [dpdk-dev] [PATCH v2] librte_lpm: Improve performance of the delete and add functions

2018-07-06 Thread Bruce Richardson
On Fri, Jul 06, 2018 at 11:13:53AM +0100, Bruce Richardson wrote:
> On Mon, Jul 02, 2018 at 07:42:11PM +0300, Alex Kiselev wrote:
> > There are two major problems with the library:
> > first, there is no need to rebuild the whole LPM tree
> > when a rule is deleted and second, due to the current
> > rules algorithm with complexity O(n) it's almost
> > impossible to deal with large rule sets (50k or so rules).
> > This patch addresses those two issues.
> > 
> > Signed-off-by: Alex Kiselev 
> > ---
> >  lib/librte_lpm/rte_lpm6.c | 1073 
> > ++---
> >  1 file changed, 816 insertions(+), 257 deletions(-)
> > 
> 
> I get a compiler error with gcc8 after this patch:
> 
> /home/bruce/dpdk.org/lib/librte_lpm/rte_lpm6.c: In function 
> ‘rte_lpm6_add_v1705’:
> /home/bruce/dpdk.org/lib/librte_lpm/rte_lpm6.c:748:18: error: ‘tbl_next_num’ 
> may be used uninitialized in this function [-Werror=maybe-uninitialized]
> lpm->tbl8_hdrs[tbl_ind].ref_cnt++;
> ^
> 

On fixing this, clang errors show up thereafter. I suggest using
"test-meson-builds.sh" to sanity check compile on the patch.

Thanks,
/Bruce

ccache clang -Ilib/lib@@rte_lpm@sta -Ilib -I../lib -Ilib/librte_lpm 
-I../lib/librte_lpm -I. -I../ -Iconfig -I../config -Ilib/librte_eal/common 
-I../lib/librte_eal/common -Ilib/librte_eal/common/include 
-I../lib/librte_eal/common/include -Ilib/librte_eal/common/include/arch/x86 
-I../lib/librte_eal/common/include/arch/x86 
-I../lib/librte_eal/linuxapp/eal/include 
-Ilib/librte_eal/linuxapp/eal/../../../librte_compat 
-I../lib/librte_eal/linuxapp/eal/../../../librte_compat -Ilib/librte_eal 
-I../lib/librte_eal -Ilib/librte_compat -I../lib/librte_compat 
-Ilib/librte_mempool -I../lib/librte_mempool -Ilib/librte_ring 
-I../lib/librte_ring -Ilib/librte_hash -I../lib/librte_hash -Xclang 
-fcolor-diagnostics -pipe -D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -Werror 
-O3 -include rte_config.h -Wsign-compare -Wcast-qual 
-Wno-address-of-packed-member -fPIC -march=native  -MD -MQ 
'lib/lib@@rte_lpm@sta/librte_lpm_rte_lpm6.c.o' -MF 
'lib/lib@@rte_lpm@sta/librte_lpm_rte_lpm6.c.o.d' -o 
'lib/lib@@rte_lpm@sta/librte_lpm_rte_lpm6.c.o' -c ../lib/librte_lpm/rte_lpm6.c
../lib/librte_lpm/rte_lpm6.c:233:58: error: cast from 'struct rte_lpm6_rule **' 
to 'const void **' must have all intermediate pointers const qualified to be 
safe [-Werror,-Wcast-qual]
while (rte_hash_iterate(lpm->rules_tbl, (const void **) &rule_key,
^
../lib/librte_lpm/rte_lpm6.c:247:58: error: cast from 'struct rte_lpm6_rule **' 
to 'const void **' must have all intermediate pointers const qualified to be 
safe [-Werror,-Wcast-qual]
while (rte_hash_iterate(lpm->rules_tbl, (const void **) &rule_key,
^



Re: [dpdk-dev] [PATCH v4 00/23] net/softnic: refactoring

2018-07-06 Thread Dumitrescu, Cristian


> -Original Message-
> From: Singh, Jasvinder
> Sent: Thursday, July 5, 2018 4:48 PM
> To: dev@dpdk.org
> Cc: Dumitrescu, Cristian 
> Subject: [PATCH v4 00/23] net/softnic: refactoring
> 
> This patch set modifies the Soft NIC device driver to use the Packet
> Framework, which makes it much more modular, flexible and extensible
> with new functionality.
> 
> 
Applied to next-pipeline, thanks!



Re: [dpdk-dev] [PATCH v2] librte_lpm: Improve performance of the delete and add functions

2018-07-06 Thread Bruce Richardson
On Mon, Jul 02, 2018 at 07:42:11PM +0300, Alex Kiselev wrote:
> There are two major problems with the library:
> first, there is no need to rebuild the whole LPM tree
> when a rule is deleted and second, due to the current
> rules algorithm with complexity O(n) it's almost
> impossible to deal with large rule sets (50k or so rules).
> This patch addresses those two issues.
> 
> Signed-off-by: Alex Kiselev 
> ---
>  lib/librte_lpm/rte_lpm6.c | 1073 
> ++---
>  1 file changed, 816 insertions(+), 257 deletions(-)
> 
The lpm6_autotest is now giving me an error when I run it, which wasn't
there before, though interestingly the test is still passing overall, which
seems wrong:

RTE>>lpm6_autotest
# test 00
# test 01
LPM: LPM rules mempool allocation failed: Unknown error 17 (17)# test 02
# test 03
...

On the other hand, the performance numbers, especially for delete, look far
better:

Before:
Average LPM Add: 531220 cycles
Average LPM Lookup: 41.7 cycles (fails = 0.0%)
BULK LPM Lookup: 33.8 cycles (fails = 0.0%)
Average LPM Delete: 1.41825e+08 cycles

After:
Average LPM Add: 487116 cycles
Average LPM Lookup: 41.7 cycles (fails = 0.0%)
BULK LPM Lookup: 33.3 cycles (fails = 0.0%)
Average LPM Delete: 3.65125e+06 cycles

/Bruce


[dpdk-dev] [PATCH v4 4/4] compressdev: add huffman encoding flags

2018-07-06 Thread Pablo de Lara
Added Huffman fixed and dynamic encoding feature flags,
so an application can query if a device supports
these two types, when performing DEFLATE compression.

Signed-off-by: Pablo de Lara 
Acked-by: Fiona Trahe 
---

v4/v3:
- No change

v2:
- Fixed typo

 drivers/compress/isal/isal_compress_pmd_ops.c |  4 +++-
 lib/librte_compressdev/rte_comp.c |  4 
 lib/librte_compressdev/rte_comp.h |  4 
 test/test/test_compressdev.c  | 16 
 4 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/drivers/compress/isal/isal_compress_pmd_ops.c 
b/drivers/compress/isal/isal_compress_pmd_ops.c
index 970a0413b..585f22802 100644
--- a/drivers/compress/isal/isal_compress_pmd_ops.c
+++ b/drivers/compress/isal/isal_compress_pmd_ops.c
@@ -12,7 +12,9 @@
 static const struct rte_compressdev_capabilities isal_pmd_capabilities[] = {
{
.algo = RTE_COMP_ALGO_DEFLATE,
-   .comp_feature_flags =   RTE_COMP_FF_SHAREABLE_PRIV_XFORM,
+   .comp_feature_flags =   RTE_COMP_FF_SHAREABLE_PRIV_XFORM |
+   RTE_COMP_FF_HUFFMAN_FIXED |
+   RTE_COMP_FF_HUFFMAN_DYNAMIC,
.window_size = {
.min = 15,
.max = 15,
diff --git a/lib/librte_compressdev/rte_comp.c 
b/lib/librte_compressdev/rte_comp.c
index 97ea0d922..98ad0cfd9 100644
--- a/lib/librte_compressdev/rte_comp.c
+++ b/lib/librte_compressdev/rte_comp.c
@@ -36,6 +36,10 @@ rte_comp_get_feature_name(uint64_t flag)
return "SHA2_SHA256_HASH";
case RTE_COMP_FF_SHAREABLE_PRIV_XFORM:
return "SHAREABLE_PRIV_XFORM";
+   case RTE_COMP_FF_HUFFMAN_FIXED:
+   return "HUFFMAN_FIXED";
+   case RTE_COMP_FF_HUFFMAN_DYNAMIC:
+   return "HUFFMAN_DYNAMIC";
default:
return NULL;
}
diff --git a/lib/librte_compressdev/rte_comp.h 
b/lib/librte_compressdev/rte_comp.h
index 274b5eadf..1f66945ee 100644
--- a/lib/librte_compressdev/rte_comp.h
+++ b/lib/librte_compressdev/rte_comp.h
@@ -63,6 +63,10 @@ extern "C" {
  * to create as many priv_xforms as it expects to have stateless
  * operations in-flight.
  */
+#define RTE_COMP_FF_HUFFMAN_FIXED  (1ULL << 13)
+/**< Fixed huffman encoding is supported */
+#define RTE_COMP_FF_HUFFMAN_DYNAMIC(1ULL << 14)
+/**< Dynamic huffman encoding is supported */
 
 /** Status of comp operation */
 enum rte_comp_op_status {
diff --git a/test/test/test_compressdev.c b/test/test/test_compressdev.c
index 640942bac..f960963a4 100644
--- a/test/test/test_compressdev.c
+++ b/test/test/test_compressdev.c
@@ -846,6 +846,14 @@ test_compressdev_deflate_stateless_fixed(void)
const char *test_buffer;
uint16_t i;
int ret;
+   const struct rte_compressdev_capabilities *capab;
+
+   capab = rte_compressdev_capability_get(0, RTE_COMP_ALGO_DEFLATE);
+   TEST_ASSERT(capab != NULL, "Failed to retrieve device capabilities");
+
+   if ((capab->comp_feature_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0)
+   return -ENOTSUP;
+
struct rte_comp_xform *compress_xform =
rte_malloc(NULL, sizeof(struct rte_comp_xform), 0);
 
@@ -905,6 +913,14 @@ test_compressdev_deflate_stateless_dynamic(void)
struct rte_comp_xform *compress_xform =
rte_malloc(NULL, sizeof(struct rte_comp_xform), 0);
 
+   const struct rte_compressdev_capabilities *capab;
+
+   capab = rte_compressdev_capability_get(0, RTE_COMP_ALGO_DEFLATE);
+   TEST_ASSERT(capab != NULL, "Failed to retrieve device capabilities");
+
+   if ((capab->comp_feature_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0)
+   return -ENOTSUP;
+
if (compress_xform == NULL) {
RTE_LOG(ERR, USER1,
"Compress xform could not be created\n");
-- 
2.14.4



[dpdk-dev] [PATCH v4 2/4] doc: rename compress feature flag

2018-07-06 Thread Pablo de Lara
Renamed feature "Bypass" to "Pass-through",
as it is a more explicit name, meaning that the PMD
is capable of passing the mbufs through it,
without making any modifications (i.e.. NULL algorithm).

Signed-off-by: Pablo de Lara 
Acked-by: Fiona Trahe 
---

v4:
- Rephrased pass-through feature comment (Shally)

 doc/guides/compressdevs/features/default.ini | 2 +-
 doc/guides/compressdevs/overview.rst | 6 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/doc/guides/compressdevs/features/default.ini 
b/doc/guides/compressdevs/features/default.ini
index 795fc5577..a88414d23 100644
--- a/doc/guides/compressdevs/features/default.ini
+++ b/doc/guides/compressdevs/features/default.ini
@@ -13,7 +13,7 @@ CPU AVX2   =
 CPU AVX512 =
 CPU NEON   =
 Stateful   =
-By-Pass=
+Pass-through   =
 Chained mbufs  =
 Deflate=
 LZS=
diff --git a/doc/guides/compressdevs/overview.rst 
b/doc/guides/compressdevs/overview.rst
index ca37de175..d01c1a966 100644
--- a/doc/guides/compressdevs/overview.rst
+++ b/doc/guides/compressdevs/overview.rst
@@ -10,3 +10,9 @@ Supported Feature Flags
 .. _table_compression_pmd_features:
 
 .. include:: overview_feature_table.txt
+
+.. Note::
+
+   - "Pass-through" feature flag refers to the ability of the PMD
+ to let input buffers pass-through it, copying the input to the output,
+ without making any modifications to it (no compression done).
-- 
2.14.4



[dpdk-dev] [PATCH v4 1/4] doc: cleanup ISA-L PMD feature matrix

2018-07-06 Thread Pablo de Lara
In PMD feature matrices (.ini files), it is not required to
have the list of features that are not supported,
just the ones that are.

Signed-off-by: Pablo de Lara 
Acked-by: Lee Daly 
---

v4:
- No change

 doc/guides/compressdevs/features/isal.ini | 8 
 1 file changed, 8 deletions(-)

diff --git a/doc/guides/compressdevs/features/isal.ini 
b/doc/guides/compressdevs/features/isal.ini
index ad2718df0..1d4ff1c41 100644
--- a/doc/guides/compressdevs/features/isal.ini
+++ b/doc/guides/compressdevs/features/isal.ini
@@ -9,14 +9,6 @@ CPU SSE= Y
 CPU AVX= Y
 CPU AVX2   = Y
 CPU AVX512 = Y
-CPU NEON   =
-Stateful   =
-By-Pass=
-Chained mbufs  =
 Deflate= Y
-LZS=
-Adler32=
-Crc32  =
-Adler32&Crc32  =
 Fixed  = Y
 Dynamic= Y
-- 
2.14.4



[dpdk-dev] [PATCH v4 3/4] compressdev: replace mbuf scatter gather flag

2018-07-06 Thread Pablo de Lara
The current mbuf scatter gather feature flag is
too ambiguous, as it is not clear if input and/or output
buffers can be scatter gather mbufs or not.

Therefore, three new flags will replace this flag:
- RTE_COMP_FF_OOP_SGL_IN_SGL_OUT
- RTE_COMP_FF_OOP_SGL_IN_FB_OUT
- RTE_COMP_FF_OOP_LB_IN_SGL_OUT

Note that out-of-place flat buffers is supported by default
and in-place is not supported by the library.

Signed-off-by: Pablo de Lara 
Acked-by: Fiona Trahe 
---

v4:
- Replaced FB (Flat Buffers) with LB (Linear Buffers) (Shally)
- Add extra explanation on comments about Linear Buffers vs
  Scatter-gather lists

v3:
- Replaced Out-of-place with OOP
- Added new feature flags in default.ini

v2:
- Fixed typos
- Rephrased comments

 doc/guides/compressdevs/features/default.ini | 34 +++-
 doc/guides/compressdevs/overview.rst | 14 
 doc/guides/rel_notes/release_18_08.rst   |  6 +
 lib/librte_compressdev/rte_comp.c|  8 +--
 lib/librte_compressdev/rte_comp.h| 31 +
 5 files changed, 65 insertions(+), 28 deletions(-)

diff --git a/doc/guides/compressdevs/features/default.ini 
b/doc/guides/compressdevs/features/default.ini
index a88414d23..829e4df61 100644
--- a/doc/guides/compressdevs/features/default.ini
+++ b/doc/guides/compressdevs/features/default.ini
@@ -6,19 +6,21 @@
 ; the features table in the documentation.
 ;
 [Features]
-HW Accelerated =
-CPU SSE=
-CPU AVX=
-CPU AVX2   =
-CPU AVX512 =
-CPU NEON   =
-Stateful   =
-Pass-through   =
-Chained mbufs  =
-Deflate=
-LZS=
-Adler32=
-Crc32  =
-Adler32&Crc32  =
-Fixed  =
-Dynamic=
+HW Accelerated  =
+CPU SSE =
+CPU AVX =
+CPU AVX2=
+CPU AVX512  =
+CPU NEON=
+Stateful=
+Pass-through=
+OOP SGL In SGL Out  =
+OOP SGL In LB  Out  =
+OOP LB  In SGL Out  =
+Deflate =
+LZS =
+Adler32 =
+Crc32   =
+Adler32&Crc32   =
+Fixed   =
+Dynamic =
diff --git a/doc/guides/compressdevs/overview.rst 
b/doc/guides/compressdevs/overview.rst
index d01c1a966..6d12c3bd6 100644
--- a/doc/guides/compressdevs/overview.rst
+++ b/doc/guides/compressdevs/overview.rst
@@ -16,3 +16,17 @@ Supported Feature Flags
- "Pass-through" feature flag refers to the ability of the PMD
  to let input buffers pass-through it, copying the input to the output,
  without making any modifications to it (no compression done).
+
+   - "OOP SGL In SGL Out" feature flag stands for
+ "Out-of-place Scatter-gather list Input, Scatter-gater list Output",
+ which means that the input and output buffers can consist of multiple 
segments.
+
+   - "OOP SGL In LB Out" feature flag stands for
+ "Out-of-place Scatter-gather list Input, Flat Buffers Output",
+ which means that the input buffer can consist of multiple segments 
combined
+ with a single segment buffer in the output.
+
+   - "OOP LB In SGL Out" feature flag stands for
+ "Out-of-place Flat Buffers Input, Scatter-gather list Output",
+ which means that the output buffer can consist of multiple segments 
combined
+ with a single segment buffer in the input.
diff --git a/doc/guides/rel_notes/release_18_08.rst 
b/doc/guides/rel_notes/release_18_08.rst
index bc0124295..3487e3fb9 100644
--- a/doc/guides/rel_notes/release_18_08.rst
+++ b/doc/guides/rel_notes/release_18_08.rst
@@ -60,6 +60,12 @@ API Changes
Also, make sure to start the actual text at the margin.
=
 
+* compressdev: Feature flag ``RTE_COMP_FF_MBUF_SCATTER_GATHER`` is
+  replaced with the following more explicit flags:
+  - ``RTE_COMP_FF_OOP_SGL_IN_SGL_OUT``
+  - ``RTE_COMP_FF_OOP_SGL_IN_LB_OUT``
+  - ``RTE_COMP_FF_OOP_LB_IN_SGL_OUT``
+
 
 ABI Changes
 ---
diff --git a/lib/librte_compressdev/rte_comp.c 
b/lib/librte_compressdev/rte_comp.c
index d596ba872..97ea0d922 100644
--- a/lib/librte_compressdev/rte_comp.c
+++ b/lib/librte_compressdev/rte_comp.c
@@ -14,8 +14,12 @@ rte_comp_get_feature_name(uint64_t flag)
return "STATEFUL_COMPRESSION";
case RTE_COMP_FF_STATEFUL_DECOMPRESSION:
return "STATEFUL_DECOMPRESSION";
-   case RTE_COMP_FF_MBUF_SCATTER_GATHER:
-   return "MBUF_SCATTER_GATHER";
+   case RTE_COMP_FF_OOP_SGL_IN_SGL_OUT:
+   return "OOP_SGL_IN_SGL_OUT";
+   case RTE_COMP_FF_OOP_SGL_IN_LB_OUT:
+   return "OOP_SGL_IN_LB_OUT";
+   case RTE_COMP_FF_OOP_LB_IN_SGL_OUT:
+   return "OOP_LB_IN_SGL_OUT";
case RTE_COMP_FF_MULTI_PKT_CHECKSUM:
return "MULTI_PKT_CHECKSUM";
case RTE_COMP_FF_ADLER32_CHECKSUM:
diff --git a/lib/librte_compressdev/rte_comp.h 
b/lib/librte_compressdev/rte_comp.h
index 5b513c77e..274b5eadf 100644
--- a/lib/lib

Re: [dpdk-dev] [PATCH v2] librte_lpm: Improve performance of the delete and add functions

2018-07-06 Thread Alex Kiselev
Hi Bruce.

It's the test #1 which is giving you the error message.
And I don't see anything wrong here.
The test is trying to create LMP with the name "LPM1" which is in use 
by the already created LPM, so it writes error message to the LOG: 

  LPM rules mempool allocation failed: File exists (17)

It's strange thought that you don't get the right message.
Instead "File exists" you get "Unknown error 17".
 

/* rte_lpm6_create: lpm name == LPM1 */
lpm1 = rte_lpm6_create("LPM1", SOCKET_ID_ANY, &config);
TEST_LPM_ASSERT(lpm1 != NULL);

/* rte_lpm6_create: lpm name == LPM2 */
lpm2 = rte_lpm6_create("LPM2", SOCKET_ID_ANY, &config);
TEST_LPM_ASSERT(lpm2 != NULL);

/* rte_lpm6_create: lpm name == LPM2 */
lpm3 = rte_lpm6_create("LPM1", SOCKET_ID_ANY, &config);
TEST_LPM_ASSERT(lpm3 == NULL);


> On Mon, Jul 02, 2018 at 07:42:11PM +0300, Alex Kiselev wrote:
>> There are two major problems with the library:
>> first, there is no need to rebuild the whole LPM tree
>> when a rule is deleted and second, due to the current
>> rules algorithm with complexity O(n) it's almost
>> impossible to deal with large rule sets (50k or so rules).
>> This patch addresses those two issues.

>> Signed-off-by: Alex Kiselev 
>> ---
>>  lib/librte_lpm/rte_lpm6.c | 1073 
>> ++---
>>  1 file changed, 816 insertions(+), 257 deletions(-)

> The lpm6_autotest is now giving me an error when I run it, which wasn't
> there before, though interestingly the test is still passing overall, which
> seems wrong:

RTE>>>lpm6_autotest
> # test 00
> # test 01
> LPM: LPM rules mempool allocation failed: Unknown error 17 (17)# test 02
> # test 03
> ...

> On the other hand, the performance numbers, especially for delete, look far
> better:

> Before:
> Average LPM Add: 531220 cycles
> Average LPM Lookup: 41.7 cycles (fails = 0.0%)
> BULK LPM Lookup: 33.8 cycles (fails = 0.0%)
> Average LPM Delete: 1.41825e+08 cycles

> After:
> Average LPM Add: 487116 cycles
> Average LPM Lookup: 41.7 cycles (fails = 0.0%)
> BULK LPM Lookup: 33.3 cycles (fails = 0.0%)
> Average LPM Delete: 3.65125e+06 cycles

> /Bruce



-- 
Alex



Re: [dpdk-dev] [PATCH v4 2/4] doc: rename compress feature flag

2018-07-06 Thread Verma, Shally



>-Original Message-
>From: Pablo de Lara [mailto:pablo.de.lara.gua...@intel.com]
>Sent: 06 July 2018 08:24
>To: Verma, Shally ; Gupta, Ashish 
>; fiona.tr...@intel.com;
>lee.d...@intel.com
>Cc: dev@dpdk.org; Pablo de Lara 
>Subject: [PATCH v4 2/4] doc: rename compress feature flag
>
>External Email
>
>Renamed feature "Bypass" to "Pass-through",
>as it is a more explicit name, meaning that the PMD
>is capable of passing the mbufs through it,
>without making any modifications (i.e.. NULL algorithm).
>
>Signed-off-by: Pablo de Lara 
>Acked-by: Fiona Trahe 
>---
>
>v4:
>- Rephrased pass-through feature comment (Shally)
>
> doc/guides/compressdevs/features/default.ini | 2 +-
> doc/guides/compressdevs/overview.rst | 6 ++
> 2 files changed, 7 insertions(+), 1 deletion(-)
>
>diff --git a/doc/guides/compressdevs/features/default.ini 
>b/doc/guides/compressdevs/features/default.ini
>index 795fc5577..a88414d23 100644
>--- a/doc/guides/compressdevs/features/default.ini
>+++ b/doc/guides/compressdevs/features/default.ini
>@@ -13,7 +13,7 @@ CPU AVX2   =
> CPU AVX512 =
> CPU NEON   =
> Stateful   =
>-By-Pass=
>+Pass-through   =
> Chained mbufs  =
> Deflate=
> LZS=
>diff --git a/doc/guides/compressdevs/overview.rst 
>b/doc/guides/compressdevs/overview.rst
>index ca37de175..d01c1a966 100644
>--- a/doc/guides/compressdevs/overview.rst
>+++ b/doc/guides/compressdevs/overview.rst
>@@ -10,3 +10,9 @@ Supported Feature Flags
> .. _table_compression_pmd_features:
>
> .. include:: overview_feature_table.txt
>+
>+.. Note::
>+
>+   - "Pass-through" feature flag refers to the ability of the PMD
>+ to let input buffers pass-through it, copying the input to the output,
>+ without making any modifications to it (no compression done).
>--
>2.14.4
Acked-by: Shally Verma 



Re: [dpdk-dev] [PATCH v3 03/16] bus/dpaa: fix the buffer offset setting in FMAN

2018-07-06 Thread Shreyansh Jain

On Friday 06 July 2018 01:40 PM, Hemant Agrawal wrote:

The buffer offset was incorrectly being set at 64,
thus not honoring the packet headroom.

Fixes: 6d6b4f49a155 (bus/dpaa: add FMAN hardware operations")
Cc: sta...@dpdk.org

Signed-off-by: Hemant Agrawal 
---


Acked-by: Shreyansh Jain 


Re: [dpdk-dev] [PATCH v4 3/4] compressdev: replace mbuf scatter gather flag

2018-07-06 Thread Verma, Shally
Hi Pablo

Looks fine. Just minor comments:

>-Original Message-
>From: Pablo de Lara [mailto:pablo.de.lara.gua...@intel.com]
>Sent: 06 July 2018 08:24
>To: Verma, Shally ; Gupta, Ashish 
>; fiona.tr...@intel.com;
>lee.d...@intel.com
>Cc: dev@dpdk.org; Pablo de Lara 
>Subject: [PATCH v4 3/4] compressdev: replace mbuf scatter gather flag
>
...

>@@ -6,19 +6,21 @@
>diff --git a/doc/guides/compressdevs/overview.rst 
>b/doc/guides/compressdevs/overview.rst
>index d01c1a966..6d12c3bd6 100644
>--- a/doc/guides/compressdevs/overview.rst
>+++ b/doc/guides/compressdevs/overview.rst
>@@ -16,3 +16,17 @@ Supported Feature Flags
>- "Pass-through" feature flag refers to the ability of the PMD
>  to let input buffers pass-through it, copying the input to the output,
>  without making any modifications to it (no compression done).
>+
>+   - "OOP SGL In SGL Out" feature flag stands for
>+ "Out-of-place Scatter-gather list Input, Scatter-gater list Output",
>+ which means that the input and output buffers can consist of multiple 
>segments.
>+
May be its simpler to rephrase it and following somewhat this way:

Which means " PMD support different scatter-gather styled input and output 
buffers i.e. both can consists of multiple segments"

>+   - "OOP SGL In LB Out" feature flag stands for
>+ "Out-of-place Scatter-gather list Input, Flat Buffers Output",
>+ which means that the input buffer can consist of multiple segments 
>combined
>+ with a single segment buffer in the output.

Which means "PMD support input from scatter-gathered styled buffers , but can 
output to linear buffers.
And better to replace "flat" by "linear" to be consistent with name.

>+
>+   - "OOP LB In SGL Out" feature flag stands for
>+ "Out-of-place Flat Buffers Input, Scatter-gather list Output",
>+ which means that the output buffer can consist of multiple segments 
>combined
>+ with a single segment buffer in the input.

Thanks
Shally

>diff --git a/doc/guides/rel_notes/release_18_08.rst 
>b/doc/guides/rel_notes/release_18_08.rst
>index bc0124295..3487e3fb9 100644
>--- a/doc/guides/rel_notes/release_18_08.rst
>+++ b/doc/guides/rel_notes/release_18_08.rst
>@@ -60,6 +60,12 @@ API Changes
>Also, make sure to start the actual text at the margin.
>=
>
>+* compressdev: Feature flag ``RTE_COMP_FF_MBUF_SCATTER_GATHER`` is
>+  replaced with the following more explicit flags:
>+  - ``RTE_COMP_FF_OOP_SGL_IN_SGL_OUT``
>+  - ``RTE_COMP_FF_OOP_SGL_IN_LB_OUT``
>+  - ``RTE_COMP_FF_OOP_LB_IN_SGL_OUT``
>+
>
> ABI Changes
> ---
>diff --git a/lib/librte_compressdev/rte_comp.c 
>b/lib/librte_compressdev/rte_comp.c
>index d596ba872..97ea0d922 100644
>--- a/lib/librte_compressdev/rte_comp.c
>+++ b/lib/librte_compressdev/rte_comp.c
>@@ -14,8 +14,12 @@ rte_comp_get_feature_name(uint64_t flag)
>return "STATEFUL_COMPRESSION";
>case RTE_COMP_FF_STATEFUL_DECOMPRESSION:
>return "STATEFUL_DECOMPRESSION";
>-   case RTE_COMP_FF_MBUF_SCATTER_GATHER:
>-   return "MBUF_SCATTER_GATHER";
>+   case RTE_COMP_FF_OOP_SGL_IN_SGL_OUT:
>+   return "OOP_SGL_IN_SGL_OUT";
>+   case RTE_COMP_FF_OOP_SGL_IN_LB_OUT:
>+   return "OOP_SGL_IN_LB_OUT";
>+   case RTE_COMP_FF_OOP_LB_IN_SGL_OUT:
>+   return "OOP_LB_IN_SGL_OUT";
>case RTE_COMP_FF_MULTI_PKT_CHECKSUM:
>return "MULTI_PKT_CHECKSUM";
>case RTE_COMP_FF_ADLER32_CHECKSUM:
>diff --git a/lib/librte_compressdev/rte_comp.h 
>b/lib/librte_compressdev/rte_comp.h
>index 5b513c77e..274b5eadf 100644
>--- a/lib/librte_compressdev/rte_comp.h
>+++ b/lib/librte_compressdev/rte_comp.h
>@@ -30,23 +30,34 @@ extern "C" {
> /**< Stateful compression is supported */
> #define RTE_COMP_FF_STATEFUL_DECOMPRESSION (1ULL << 1)
> /**< Stateful decompression is supported */
>-#defineRTE_COMP_FF_MBUF_SCATTER_GATHER (1ULL << 2)
>-/**< Scatter-gather mbufs are supported */
>-#define RTE_COMP_FF_ADLER32_CHECKSUM   (1ULL << 3)
>+#define RTE_COMP_FF_OOP_SGL_IN_SGL_OUT (1ULL << 2)
>+/**< Out-of-place Scatter-gather (SGL) buffers,
>+ * with multiple segments, are supported in input and output
>+ */
>+#define RTE_COMP_FF_OOP_SGL_IN_LB_OUT  (1ULL << 3)
>+/**< Out-of-place Scatter-gather (SGL) buffers are supported
>+ * in input, combined with linear buffers (LB), with a
>+ * single segment, in output
>+ */
>+#define RTE_COMP_FF_OOP_LB_IN_SGL_OUT  (1ULL << 4)
>+/**< Out-of-place Scatter-gather (SGL) mbufs are supported
>+ * in output, combined with linear buffers (LB) in input
>+ */
>+#define RTE_COMP_FF_ADLER32_CHECKSUM   (1ULL << 5)
> /**< Adler-32 Checksum is supported */
>-#define RTE_COMP_FF_CRC32_CHECKSUM (1ULL << 4)
>+#define RTE_COMP_FF_CRC32_CHECKSUM (1ULL << 6)
> /**< CRC32 Checksum is supported */
>-#define RTE_COMP_FF_CRC32_ADLER32_CHECKS

[dpdk-dev] [RFC 00/11] Support externally allocated memory in DPDK

2018-07-06 Thread Anatoly Burakov
This is a proposal to enable using externally allocated memory
in DPDK.

In a nutshell, here is what is being done here:

- Index malloc heaps by NUMA node index, rather than NUMA node itself
- Add identifier string to malloc heap, to uniquely identify it
- Allow creating named heaps and add/remove memory to/from those heaps
- Allocate memseg lists at runtime, to keep track of IOVA addresses
  of externally allocated memory
  - If IOVA addresses aren't provided, use RTE_BAD_IOVA
- Allow malloc and memzones to allocate from named heaps

The responsibility to ensure memory is accessible before using it is
on the shoulders of the user - there is no checking done with regards
to validity of the memory (nor could there be...).

The following limitations are present:

- No multiprocess support
- No thread safety

There is currently no way to allocate memory during initialization
stage, so even if multiprocess support is added, it is not guaranteed
to work because of underlying issues with mapping fbarrays in
secondary processes. This is not an issue in single process scenario,
but it may be an issue in a multiprocess scenario in case where
primary doesn't intend to share the externally allocated memory, yet
adding such memory could fail because some other process failed to
attach to this shared memory when it wasn't needed.

Anatoly Burakov (11):
  mem: allow memseg lists to be marked as external
  eal: add function to rerieve socket index by socket ID
  malloc: index heaps using heap ID rather than NUMA node
  malloc: add name to malloc heaps
  malloc: enable retrieving statistics from named heaps
  malloc: enable allocating from named heaps
  malloc: enable creating new malloc heaps
  malloc: allow adding memory to named heaps
  malloc: allow removing memory from named heaps
  malloc: allow destroying heaps
  memzone: enable reserving memory from named heaps

 config/common_base|   1 +
 lib/librte_eal/common/eal_common_lcore.c  |  15 +
 lib/librte_eal/common/eal_common_memory.c |  51 +++-
 lib/librte_eal/common/eal_common_memzone.c| 283 ++
 .../common/include/rte_eal_memconfig.h|   5 +-
 lib/librte_eal/common/include/rte_lcore.h |  19 +-
 lib/librte_eal/common/include/rte_malloc.h| 158 +-
 .../common/include/rte_malloc_heap.h  |   2 +
 lib/librte_eal/common/include/rte_memzone.h   | 183 +++
 lib/librte_eal/common/malloc_heap.c   | 277 +++--
 lib/librte_eal/common/malloc_heap.h   |  26 ++
 lib/librte_eal/common/rte_malloc.c| 197 +++-
 lib/librte_eal/rte_eal_version.map|  10 +
 13 files changed, 1118 insertions(+), 109 deletions(-)

-- 
2.17.1


[dpdk-dev] [RFC 02/11] eal: add function to rerieve socket index by socket ID

2018-07-06 Thread Anatoly Burakov
We are preparing to switch to index heap based on heap indexes
rather than by NUMA nodes. First few indexes will be equal to
NUMA node ID indexes. For example, currently on a machine with
NUMA nodes [0, 8], heaps 0 and 8 will be active, while we want
to make it so that heaps 0 and 1 are active. However, currently
we don't have a function to map a specific NUMA node to a node
index, so add it in this patch.

Signed-off-by: Anatoly Burakov 
---
 lib/librte_eal/common/eal_common_lcore.c  | 15 +++
 lib/librte_eal/common/include/rte_lcore.h | 19 ++-
 lib/librte_eal/rte_eal_version.map|  1 +
 3 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/common/eal_common_lcore.c 
b/lib/librte_eal/common/eal_common_lcore.c
index 3167e9d79..579f5a0a1 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -132,3 +132,18 @@ rte_socket_id_by_idx(unsigned int idx)
}
return config->numa_nodes[idx];
 }
+
+int __rte_experimental
+rte_socket_idx_by_id(unsigned int socket)
+{
+   const struct rte_config *config = rte_eal_get_configuration();
+   int i;
+
+   for (i = 0; i < (int) config->numa_node_count; i++) {
+   unsigned int cur_socket = config->numa_nodes[i];
+   if (cur_socket == socket)
+   return i;
+   }
+   rte_errno = EINVAL;
+   return -1;
+}
diff --git a/lib/librte_eal/common/include/rte_lcore.h 
b/lib/librte_eal/common/include/rte_lcore.h
index 6e09d9181..f58cda09a 100644
--- a/lib/librte_eal/common/include/rte_lcore.h
+++ b/lib/librte_eal/common/include/rte_lcore.h
@@ -156,11 +156,28 @@ rte_socket_count(void);
  *
  * @return
  *   - physical socket id as recognized by EAL
- *   - -1 on error, with errno set to EINVAL
+ *   - -1 on error, with rte_errno set to EINVAL
  */
 int __rte_experimental
 rte_socket_id_by_idx(unsigned int idx);
 
+/**
+ * Return index for a particular socket id.
+ *
+ * This will return position in list of all detected physical socket id's for a
+ * given socket. For example, on a machine with sockets [0, 8], passing
+ * 8 as a parameter will return 1.
+ *
+ * @param socket
+ *   physical socket id to return index for
+ *
+ * @return
+ *   - index of physical socket id as recognized by EAL
+ *   - -1 on error, with rte_errno set to EINVAL
+ */
+int __rte_experimental
+rte_socket_idx_by_id(unsigned int socket);
+
 /**
  * Get the ID of the physical socket of the specified lcore
  *
diff --git a/lib/librte_eal/rte_eal_version.map 
b/lib/librte_eal/rte_eal_version.map
index f7dd0e7bc..e7fb37b2a 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -296,6 +296,7 @@ EXPERIMENTAL {
rte_mp_sendmsg;
rte_socket_count;
rte_socket_id_by_idx;
+   rte_socket_idx_by_id;
rte_vfio_dma_map;
rte_vfio_dma_unmap;
rte_vfio_get_container_fd;
-- 
2.17.1


[dpdk-dev] [RFC 09/11] malloc: allow removing memory from named heaps

2018-07-06 Thread Anatoly Burakov
Add an API to remove memory from specified heaps. This will first
check if all elements within the region are free, and that the
region is the original region that was added to the heap (by
comparing its length to length of memory addressed by the
underlying memseg list).

Signed-off-by: Anatoly Burakov 
---
 lib/librte_eal/common/include/rte_malloc.h | 28 ++
 lib/librte_eal/common/malloc_heap.c| 61 ++
 lib/librte_eal/common/malloc_heap.h|  4 ++
 lib/librte_eal/common/rte_malloc.c | 28 ++
 lib/librte_eal/rte_eal_version.map |  1 +
 5 files changed, 122 insertions(+)

diff --git a/lib/librte_eal/common/include/rte_malloc.h 
b/lib/librte_eal/common/include/rte_malloc.h
index 5f933993b..25d8d3f11 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -318,6 +318,34 @@ int __rte_experimental
 rte_malloc_heap_add_memory(const char *heap_name, void *va_addr, size_t len,
rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz);
 
+/**
+ * Remove memory area from heap with specified name.
+ *
+ * @note Concurrently adding or removing memory from different heaps is not
+ *   safe.
+ *
+ * @note This function does not need to be called in multiple processes, as
+ *   multiprocess synchronization will happen automatically as far as heap data
+ *   is concerned. However, before accessing pointers to memory in this heap, 
it
+ *   is responsibility of the user to ensure that the heap memory is accessible
+ *   in all processes.
+ *
+ * @note Memory area must be empty to allow its removal from the heap.
+ *
+ * @param heap_name
+ *   Name of the heap to create.
+ * @param va_addr
+ *   Virtual address to remove from the heap.
+ * @param len
+ *   Length of virtual area to remove from the heap.
+ *
+ * @return
+ *   - 0 on successful creation.
+ *   - -1 on error.
+ */
+int __rte_experimental
+rte_malloc_heap_remove_memory(const char *heap_name, void *va_addr, size_t 
len);
+
 /**
  * If malloc debug is enabled, check a memory block for header
  * and trailer markers to indicate that all is well with the block.
diff --git a/lib/librte_eal/common/malloc_heap.c 
b/lib/librte_eal/common/malloc_heap.c
index 29446cac9..27dbf6e60 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -892,6 +892,44 @@ malloc_heap_dump(struct malloc_heap *heap, FILE *f)
rte_spinlock_unlock(&heap->lock);
 }
 
+static int
+destroy_seg(struct malloc_elem *elem, size_t len)
+{
+   struct malloc_heap *heap = elem->heap;
+   struct rte_memseg_list *msl;
+
+   /* check if the element is unused */
+   if (elem->state != ELEM_FREE) {
+   rte_errno = EBUSY;
+   return -1;
+   }
+
+   msl = elem->msl;
+
+   /* check if element encompasses the entire memseg list */
+   if (elem->size != len || len != (msl->page_sz * msl->memseg_arr.len)) {
+   rte_errno = EINVAL;
+   return -1;
+   }
+
+   /* destroy the fbarray backing this memory */
+   if (rte_fbarray_destroy(&msl->memseg_arr) < 0)
+   return -1;
+
+   /* reset the memseg list */
+   memset(msl, 0, sizeof(*msl));
+
+   /* this element can be removed */
+   malloc_elem_free_list_remove(elem);
+   malloc_elem_hide_region(elem, elem, len);
+
+   memset(elem, 0, sizeof(*elem));
+
+   heap->total_size -= len;
+
+   return 0;
+}
+
 int
 malloc_heap_add_external_memory(struct malloc_heap *heap, void *va_addr,
rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz)
@@ -962,6 +1000,29 @@ malloc_heap_add_external_memory(struct malloc_heap *heap, 
void *va_addr,
return 0;
 }
 
+int
+malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr,
+   size_t len)
+{
+   struct malloc_elem *elem = heap->first;
+
+   /* find element with specified va address */
+   while (elem != NULL && elem != va_addr) {
+   elem = elem->next;
+   /* stop if we've blown past our VA */
+   if (elem > (struct malloc_elem *)va_addr) {
+   elem = NULL;
+   break;
+   }
+   }
+   /* check if element was found */
+   if (elem == NULL) {
+   rte_errno = EINVAL;
+   return -1;
+   }
+   return destroy_seg(elem, len);
+}
+
 int
 malloc_heap_create(struct malloc_heap *heap, const char *heap_name)
 {
diff --git a/lib/librte_eal/common/malloc_heap.h 
b/lib/librte_eal/common/malloc_heap.h
index 3be4656d0..000146365 100644
--- a/lib/librte_eal/common/malloc_heap.h
+++ b/lib/librte_eal/common/malloc_heap.h
@@ -42,6 +42,10 @@ int
 malloc_heap_add_external_memory(struct malloc_heap *heap, void *va_addr,
rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz);
 
+int
+malloc_heap_remove_external_memory(struct mal

[dpdk-dev] [RFC 01/11] mem: allow memseg lists to be marked as external

2018-07-06 Thread Anatoly Burakov
When we allocate and use DPDK memory, we need to be able to
differentiate between DPDK hugepage segments and segments that
were made part of DPDK but are externally allocated. Add such
a property to memseg lists.

Signed-off-by: Anatoly Burakov 
---
 lib/librte_eal/common/eal_common_memory.c | 51 ---
 .../common/include/rte_eal_memconfig.h|  1 +
 lib/librte_eal/common/malloc_heap.c   |  2 +-
 3 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_memory.c 
b/lib/librte_eal/common/eal_common_memory.c
index 4f0688f9d..835bbffb6 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -24,6 +24,21 @@
 #include "eal_private.h"
 #include "eal_internal_cfg.h"
 
+/* forward declarations for memseg walk functions. we support external 
segments,
+ * but for some functionality to work, we need to either skip or not skip
+ * external segments. for example, while we expect for virt2memseg to return a
+ * valid memseg even though it's an external memseg, for regular memseg walk we
+ * want to skip those because the expectation is that we will only walk the
+ * DPDK allocated memory.
+ */
+static int
+memseg_list_walk(rte_memseg_list_walk_t func, void *arg, bool skip_external);
+static int
+memseg_walk(rte_memseg_walk_t func, void *arg, bool skip_external);
+static int
+memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg,
+   bool skip_external);
+
 /*
  * Try to mmap *size bytes in /dev/zero. If it is successful, return the
  * pointer to the mmap'd area and keep *size unmodified. Else, retry
@@ -621,9 +636,9 @@ rte_mem_iova2virt(rte_iova_t iova)
 * as we know they are PA-contiguous as well
 */
if (internal_config.legacy_mem)
-   rte_memseg_contig_walk(find_virt_legacy, &vi);
+   memseg_contig_walk(find_virt_legacy, &vi, false);
else
-   rte_memseg_walk(find_virt, &vi);
+   memseg_walk(find_virt, &vi, false);
 
return vi.virt;
 }
@@ -787,8 +802,8 @@ rte_mem_lock_page(const void *virt)
return mlock((void *)aligned, page_size);
 }
 
-int __rte_experimental
-rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg)
+static int
+memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg, bool 
skip_external)
 {
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
int i, ms_idx, ret = 0;
@@ -803,6 +818,8 @@ rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void 
*arg)
 
if (msl->memseg_arr.count == 0)
continue;
+   if (skip_external && msl->external)
+   continue;
 
arr = &msl->memseg_arr;
 
@@ -837,7 +854,13 @@ rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void 
*arg)
 }
 
 int __rte_experimental
-rte_memseg_walk(rte_memseg_walk_t func, void *arg)
+rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg)
+{
+   return memseg_contig_walk(func, arg, true);
+}
+
+static int
+memseg_walk(rte_memseg_walk_t func, void *arg, bool skip_external)
 {
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
int i, ms_idx, ret = 0;
@@ -852,6 +875,8 @@ rte_memseg_walk(rte_memseg_walk_t func, void *arg)
 
if (msl->memseg_arr.count == 0)
continue;
+   if (skip_external && msl->external)
+   continue;
 
arr = &msl->memseg_arr;
 
@@ -875,7 +900,13 @@ rte_memseg_walk(rte_memseg_walk_t func, void *arg)
 }
 
 int __rte_experimental
-rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
+rte_memseg_walk(rte_memseg_walk_t func, void *arg)
+{
+   return memseg_walk(func, arg, true);
+}
+
+static int
+memseg_list_walk(rte_memseg_list_walk_t func, void *arg, bool skip_external)
 {
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
int i, ret = 0;
@@ -888,6 +919,8 @@ rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
 
if (msl->base_va == NULL)
continue;
+   if (skip_external && msl->external)
+   continue;
 
ret = func(msl, arg);
if (ret < 0) {
@@ -904,6 +937,12 @@ rte_memseg_list_walk(rte_memseg_list_walk_t func, void 
*arg)
return ret;
 }
 
+int __rte_experimental
+rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
+{
+   return memseg_list_walk(func, arg, true);
+}
+
 /* init memory subsystem */
 int
 rte_eal_memory_init(void)
diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h 
b/lib/librte_eal/common/include/rte_eal_memconfig.h
index aff0688dd..4e8720ba6 100644
--- a/lib/librte_eal/common/include/rte_eal_memconfig.h
+++ b/lib/librte_eal/common/include/rte_eal_memconfig.h
@@ -30,6 +30,7 @@ struct rte_memseg_list {
uint64_t addr_64;

[dpdk-dev] [RFC 06/11] malloc: enable allocating from named heaps

2018-07-06 Thread Anatoly Burakov
Add new malloc API to allocate memory from heap referenced to by
specified name.

Signed-off-by: Anatoly Burakov 
---
 lib/librte_eal/common/include/rte_malloc.h | 25 ++
 lib/librte_eal/common/malloc_heap.c|  2 +-
 lib/librte_eal/common/malloc_heap.h|  6 ++
 lib/librte_eal/common/rte_malloc.c | 11 ++
 lib/librte_eal/rte_eal_version.map |  1 +
 5 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/common/include/rte_malloc.h 
b/lib/librte_eal/common/include/rte_malloc.h
index 7cbcd3184..f1bcd9b65 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -213,6 +213,31 @@ rte_zmalloc_socket(const char *type, size_t size, unsigned 
align, int socket);
 void *
 rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, 
int socket);
 
+/**
+ * This function allocates memory from a specified named heap.
+ *
+ * @param name
+ *   Name of the heap to allocate from.
+ * @param type
+ *   A string identifying the type of allocated objects (useful for debug
+ *   purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param size
+ *   Size (in bytes) to be allocated.
+ * @param align
+ *   If 0, the return is a pointer that is suitably aligned for any kind of
+ *   variable (in the same manner as malloc()).
+ *   Otherwise, the return is a pointer that is a multiple of *align*. In
+ *   this case, it must be a power of two. (Minimum alignment is the
+ *   cacheline size, i.e. 64-bytes)
+ * @return
+ *   - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ * align is not a power of two).
+ *   - Otherwise, the pointer to the allocated object.
+ */
+__rte_experimental void *
+rte_malloc_from_heap(const char *heap_name, const char *type, size_t size,
+   unsigned int align);
+
 /**
  * Frees the memory space pointed to by the provided pointer.
  *
diff --git a/lib/librte_eal/common/malloc_heap.c 
b/lib/librte_eal/common/malloc_heap.c
index 8437d33b3..a33acc252 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -494,7 +494,7 @@ alloc_more_mem_on_socket(struct malloc_heap *heap, size_t 
size, int socket,
 }
 
 /* this will try lower page sizes first */
-static void *
+void *
 malloc_heap_alloc_on_heap_id(const char *type, size_t size,
unsigned int heap_id, unsigned int flags, size_t align,
size_t bound, bool contig)
diff --git a/lib/librte_eal/common/malloc_heap.h 
b/lib/librte_eal/common/malloc_heap.h
index 4f3137260..a7e408c99 100644
--- a/lib/librte_eal/common/malloc_heap.h
+++ b/lib/librte_eal/common/malloc_heap.h
@@ -29,6 +29,12 @@ void *
 malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int 
flags,
size_t align, size_t bound, bool contig);
 
+/* allocate from specified heap id */
+void *
+malloc_heap_alloc_on_heap_id(const char *type, size_t size,
+   unsigned int heap_id, unsigned int flags, size_t align,
+   size_t bound, bool contig);
+
 int
 malloc_heap_find_named_heap_idx(const char *name);
 
diff --git a/lib/librte_eal/common/rte_malloc.c 
b/lib/librte_eal/common/rte_malloc.c
index 2508abdb1..215876a59 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -55,6 +55,17 @@ rte_malloc_socket(const char *type, size_t size, unsigned 
int align,
align == 0 ? 1 : align, 0, false);
 }
 
+void *
+rte_malloc_from_heap(const char *heap_name, const char *type, size_t size,
+   unsigned int align)
+{
+   int heap_idx = malloc_heap_find_named_heap_idx(heap_name);
+   if (heap_idx < 0)
+   return NULL;
+   return malloc_heap_alloc_on_heap_id(type, size, heap_idx, 0,
+   align == 0 ? 1 : align, 0, false);
+}
+
 /*
  * Allocate memory on default heap.
  */
diff --git a/lib/librte_eal/rte_eal_version.map 
b/lib/librte_eal/rte_eal_version.map
index 786df1e39..716a7585d 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -278,6 +278,7 @@ EXPERIMENTAL {
rte_fbarray_set_used;
rte_log_register_type_and_pick_level;
rte_malloc_dump_heaps;
+   rte_malloc_from_heap;
rte_malloc_get_stats_from_heap;
rte_mem_alloc_validator_register;
rte_mem_alloc_validator_unregister;
-- 
2.17.1


[dpdk-dev] [RFC 08/11] malloc: allow adding memory to named heaps

2018-07-06 Thread Anatoly Burakov
Add an API to add externally allocated memory to malloc heap. The
memory will be stored in memseg lists like regular DPDK memory.
Multiple segments are allowed within a heap. If IOVA table is
not provided, IOVA addresses are filled in with RTE_BAD_IOVA.

Signed-off-by: Anatoly Burakov 
---
 lib/librte_eal/common/include/rte_malloc.h | 44 ++
 lib/librte_eal/common/malloc_heap.c| 70 ++
 lib/librte_eal/common/malloc_heap.h|  4 ++
 lib/librte_eal/common/rte_malloc.c | 39 
 lib/librte_eal/rte_eal_version.map |  1 +
 5 files changed, 158 insertions(+)

diff --git a/lib/librte_eal/common/include/rte_malloc.h 
b/lib/librte_eal/common/include/rte_malloc.h
index fa6de073a..5f933993b 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -274,6 +274,50 @@ rte_free(void *ptr);
 int __rte_experimental
 rte_malloc_heap_create(const char *heap_name);
 
+/**
+ * Add more memory to heap with specified name.
+ *
+ * @note Concurrently adding memory to or removing memory from different heaps
+ *   is not safe.
+ *
+ * @note This function does not need to be called in multiple processes, as
+ *   multiprocess synchronization will happen automatically as far as heap data
+ *   is concerned. However, before accessing pointers to memory in this heap, 
it
+ *   is responsibility of the user to ensure that the heap memory is accessible
+ *   in all processes.
+ *
+ * @note Memory must be previously allocated for DPDK to be able to use it as a
+ *   malloc heap. Failing to do so will result in undefined behavior, up to and
+ *   including crashes.
+ *
+ * @note Adding memory to heap may fail in multiple processes scenario, as
+ *   attaching to ``rte_fbarray`` structures may not always be successful in
+ *   secondary processes.
+ *
+ * @param heap_name
+ *   Name of the heap to create.
+ * @param va_addr
+ *   Start of virtual area to add to the heap.
+ * @param len
+ *   Length of virtual area to add to the heap.
+ * @param iova_addrs
+ *   Array of page IOVA addresses corresponding to each page in this memory
+ *   area. Can be NULL, in which case page IOVA addresses will be set to
+ *   RTE_BAD_IOVA.
+ * @param n_pages
+ *   Number of elements in the iova_addrs array. Must be zero of ``iova_addrs``
+ *   is NULL.
+ * @param page_sz
+ *   Page size of the underlying memory.
+ *
+ * @return
+ *   - 0 on successful creation.
+ *   - -1 on error.
+ */
+int __rte_experimental
+rte_malloc_heap_add_memory(const char *heap_name, void *va_addr, size_t len,
+   rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz);
+
 /**
  * If malloc debug is enabled, check a memory block for header
  * and trailer markers to indicate that all is well with the block.
diff --git a/lib/librte_eal/common/malloc_heap.c 
b/lib/librte_eal/common/malloc_heap.c
index f5d103626..29446cac9 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -892,6 +892,76 @@ malloc_heap_dump(struct malloc_heap *heap, FILE *f)
rte_spinlock_unlock(&heap->lock);
 }
 
+int
+malloc_heap_add_external_memory(struct malloc_heap *heap, void *va_addr,
+   rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz)
+{
+   struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+   char fbarray_name[RTE_FBARRAY_NAME_LEN];
+   struct rte_memseg_list *msl = NULL;
+   struct rte_fbarray *arr;
+   size_t seg_len = n_pages * page_sz;
+   unsigned int i;
+
+   /* first, find a free memseg list */
+   for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+   struct rte_memseg_list *tmp = &mcfg->memsegs[i];
+   if (tmp->base_va == NULL) {
+   msl = tmp;
+   break;
+   }
+   }
+   if (msl == NULL) {
+   RTE_LOG(ERR, EAL, "Couldn't find empty memseg list\n");
+   rte_errno = ENOSPC;
+   return -1;
+   }
+
+   snprintf(fbarray_name, sizeof(fbarray_name) - 1, "%s_%p",
+   heap->name, va_addr);
+
+   /* create the backing fbarray */
+   if (rte_fbarray_init(&msl->memseg_arr, fbarray_name, n_pages,
+   sizeof(struct rte_memseg)) < 0) {
+   RTE_LOG(ERR, EAL, "Couldn't create fbarray backing the memseg 
list\n");
+   return -1;
+   }
+   arr = &msl->memseg_arr;
+
+   /* fbarray created, fill it up */
+   for (i = 0; i < n_pages; i++) {
+   struct rte_memseg *ms;
+
+   rte_fbarray_set_used(arr, i);
+   ms = rte_fbarray_get(arr, i);
+   ms->addr = RTE_PTR_ADD(va_addr, n_pages * page_sz);
+   ms->iova = iova_addrs == NULL ? RTE_BAD_IOVA : iova_addrs[i];
+   ms->hugepage_sz = page_sz;
+   ms->len = page_sz;
+   ms->nchannel = rte_memory_get_nchannel();
+  

[dpdk-dev] [RFC 07/11] malloc: enable creating new malloc heaps

2018-07-06 Thread Anatoly Burakov
Add API to allow creating new malloc heaps. They will be created
with indexes higher than heaps reserved for NUMA sockets, and up to
RTE_MAX_HEAPS.

Signed-off-by: Anatoly Burakov 
---
 lib/librte_eal/common/include/rte_malloc.h | 21 ++
 lib/librte_eal/common/malloc_heap.c| 16 
 lib/librte_eal/common/malloc_heap.h|  3 ++
 lib/librte_eal/common/rte_malloc.c | 46 ++
 lib/librte_eal/rte_eal_version.map |  1 +
 5 files changed, 87 insertions(+)

diff --git a/lib/librte_eal/common/include/rte_malloc.h 
b/lib/librte_eal/common/include/rte_malloc.h
index f1bcd9b65..fa6de073a 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -253,6 +253,27 @@ rte_malloc_from_heap(const char *heap_name, const char 
*type, size_t size,
 void
 rte_free(void *ptr);
 
+/**
+ * Creates a new empty malloc heap with a specified name.
+ *
+ * @note Concurrently creating or destroying heaps is not safe.
+ *
+ * @note This function does not need to be called in multiple processes, as
+ *   multiprocess synchronization will happen automatically as far as heap data
+ *   is concerned. However, before accessing pointers to memory in this heap, 
it
+ *   is responsibility of the user to ensure that the heap memory is accessible
+ *   in all processes.
+ *
+ * @param heap_name
+ *   Name of the heap to create.
+ *
+ * @return
+ *   - 0 on successful creation.
+ *   - -1 on error.
+ */
+int __rte_experimental
+rte_malloc_heap_create(const char *heap_name);
+
 /**
  * If malloc debug is enabled, check a memory block for header
  * and trailer markers to indicate that all is well with the block.
diff --git a/lib/librte_eal/common/malloc_heap.c 
b/lib/librte_eal/common/malloc_heap.c
index a33acc252..f5d103626 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -892,6 +892,22 @@ malloc_heap_dump(struct malloc_heap *heap, FILE *f)
rte_spinlock_unlock(&heap->lock);
 }
 
+int
+malloc_heap_create(struct malloc_heap *heap, const char *heap_name)
+{
+   /* initialize empty heap */
+   heap->alloc_count = 0;
+   heap->first = NULL;
+   heap->last = NULL;
+   LIST_INIT(heap->free_head);
+   rte_spinlock_init(&heap->lock);
+   heap->total_size = 0;
+
+   /* set up name */
+   strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
+   return 0;
+}
+
 int
 rte_eal_malloc_heap_init(void)
 {
diff --git a/lib/librte_eal/common/malloc_heap.h 
b/lib/librte_eal/common/malloc_heap.h
index a7e408c99..aa819ef65 100644
--- a/lib/librte_eal/common/malloc_heap.h
+++ b/lib/librte_eal/common/malloc_heap.h
@@ -35,6 +35,9 @@ malloc_heap_alloc_on_heap_id(const char *type, size_t size,
unsigned int heap_id, unsigned int flags, size_t align,
size_t bound, bool contig);
 
+int
+malloc_heap_create(struct malloc_heap *heap, const char *heap_name);
+
 int
 malloc_heap_find_named_heap_idx(const char *name);
 
diff --git a/lib/librte_eal/common/rte_malloc.c 
b/lib/librte_eal/common/rte_malloc.c
index 215876a59..e000dc5b7 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -272,3 +273,48 @@ rte_malloc_virt2iova(const void *addr)
 
return ms->iova + RTE_PTR_DIFF(addr, ms->addr);
 }
+
+int
+rte_malloc_heap_create(const char *heap_name)
+{
+   struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+   struct malloc_heap *heap = NULL;
+   int i;
+
+   /* iova_addrs is allowed to be NULL */
+   if (heap_name == NULL ||
+   strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+   strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+   RTE_HEAP_NAME_MAX_LEN) {
+   rte_errno = EINVAL;
+   return -1;
+   }
+   /* check if there is space in the heap list, or if heap with this name
+* already exists. start from non-socket heaps.
+*/
+   for (i = rte_socket_count(); i < RTE_MAX_HEAPS; i++) {
+   struct malloc_heap *tmp = &mcfg->malloc_heaps[i];
+   /* existing heap */
+   if (strncmp(heap_name, tmp->name,
+   RTE_HEAP_NAME_MAX_LEN) == 0) {
+   RTE_LOG(ERR, EAL, "Heap %s already exists\n",
+   heap_name);
+   rte_errno = EEXIST;
+   return -1;
+   }
+   /* empty heap */
+   if (strnlen(tmp->name, RTE_HEAP_NAME_MAX_LEN) == 0) {
+   heap = tmp;
+   break;
+   }
+   }
+   if (heap == NULL) {
+   RTE_LOG(ERR, EAL, "Cannot create new heap: no space\n");
+   rte_errno = ENOSPC;
+   return -1;
+   

[dpdk-dev] [RFC 04/11] malloc: add name to malloc heaps

2018-07-06 Thread Anatoly Burakov
We will need to refer to external heaps in some way. While we use
heap ID's internally, for external API use it has to be something
more user-friendly. So, we will be using a string to uniquely
identify a heap.

Signed-off-by: Anatoly Burakov 
---
 lib/librte_eal/common/include/rte_malloc_heap.h |  2 ++
 lib/librte_eal/common/malloc_heap.c | 13 +
 lib/librte_eal/common/rte_malloc.c  |  1 +
 3 files changed, 16 insertions(+)

diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h 
b/lib/librte_eal/common/include/rte_malloc_heap.h
index d43fa9097..bd64dff03 100644
--- a/lib/librte_eal/common/include/rte_malloc_heap.h
+++ b/lib/librte_eal/common/include/rte_malloc_heap.h
@@ -12,6 +12,7 @@
 
 /* Number of free lists per heap, grouped by size. */
 #define RTE_HEAP_NUM_FREELISTS  13
+#define RTE_HEAP_NAME_MAX_LEN 32
 
 /* dummy definition, for pointers */
 struct malloc_elem;
@@ -27,6 +28,7 @@ struct malloc_heap {
 
unsigned alloc_count;
size_t total_size;
+   char name[RTE_HEAP_NAME_MAX_LEN];
 } __rte_cache_aligned;
 
 #endif /* _RTE_MALLOC_HEAP_H_ */
diff --git a/lib/librte_eal/common/malloc_heap.c 
b/lib/librte_eal/common/malloc_heap.c
index e7e1838b1..8f22c062b 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -848,6 +848,7 @@ malloc_heap_dump(struct malloc_heap *heap, FILE *f)
 
rte_spinlock_lock(&heap->lock);
 
+   fprintf(f, "Heap name: %s\n", heap->name);
fprintf(f, "Heap size: 0x%zx\n", heap->total_size);
fprintf(f, "Heap alloc count: %u\n", heap->alloc_count);
 
@@ -864,6 +865,18 @@ int
 rte_eal_malloc_heap_init(void)
 {
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+   unsigned int i;
+
+   /* assign names to default DPDK heaps */
+   for (i = 0; i < rte_socket_count(); i++) {
+   struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+   char heap_name[RTE_HEAP_NAME_MAX_LEN];
+   int socket_id = rte_socket_id_by_idx(i);
+
+   snprintf(heap_name, sizeof(heap_name) - 1,
+   "socket_%i", socket_id);
+   strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
+   }
 
if (register_mp_requests()) {
RTE_LOG(ERR, EAL, "Couldn't register malloc multiprocess 
actions\n");
diff --git a/lib/librte_eal/common/rte_malloc.c 
b/lib/librte_eal/common/rte_malloc.c
index 4387bc494..75d6e0b4d 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -198,6 +198,7 @@ rte_malloc_dump_stats(FILE *f, __rte_unused const char 
*type)
malloc_heap_get_stats(heap, &sock_stats);
 
fprintf(f, "Heap id:%u\n", heap_id);
+   fprintf(f, "\tHeap name:%s\n", heap->name);
fprintf(f, "\tHeap_size:%zu,\n", sock_stats.heap_totalsz_bytes);
fprintf(f, "\tFree_size:%zu,\n", sock_stats.heap_freesz_bytes);
fprintf(f, "\tAlloc_size:%zu,\n", 
sock_stats.heap_allocsz_bytes);
-- 
2.17.1


[dpdk-dev] [RFC 10/11] malloc: allow destroying heaps

2018-07-06 Thread Anatoly Burakov
Add an API to destroy specified heap. Any memory regions still
contained within the heap will be removed first.

Signed-off-by: Anatoly Burakov 
---
 lib/librte_eal/common/include/rte_malloc.h | 21 
 lib/librte_eal/common/malloc_heap.c| 29 ++
 lib/librte_eal/common/malloc_heap.h|  3 +++
 lib/librte_eal/common/rte_malloc.c | 27 
 lib/librte_eal/rte_eal_version.map |  1 +
 5 files changed, 81 insertions(+)

diff --git a/lib/librte_eal/common/include/rte_malloc.h 
b/lib/librte_eal/common/include/rte_malloc.h
index 25d8d3f11..239cda2ca 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -346,6 +346,27 @@ rte_malloc_heap_add_memory(const char *heap_name, void 
*va_addr, size_t len,
 int __rte_experimental
 rte_malloc_heap_remove_memory(const char *heap_name, void *va_addr, size_t 
len);
 
+/**
+ * Destroys a previously created malloc heap with specified name.
+ *
+ * @note Concurrently creating or destroying heaps is not thread-safe.
+ *
+ * @note This function does not deallocate the memory backing the heap - it 
only
+ *   deregisters memory from DPDK.
+ *
+ * @note This function will return a failure result if not all memory allocated
+ *   from the heap has been freed back to malloc heap.
+ *
+ * @param heap_name
+ *   Name of the heap to create.
+ *
+ * @return
+ *   - 0 on successful creation.
+ *   - -1 on error.
+ */
+int __rte_experimental
+rte_malloc_heap_destroy(const char *heap_name);
+
 /**
  * If malloc debug is enabled, check a memory block for header
  * and trailer markers to indicate that all is well with the block.
diff --git a/lib/librte_eal/common/malloc_heap.c 
b/lib/librte_eal/common/malloc_heap.c
index 27dbf6e60..e447b6412 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -1039,6 +1039,35 @@ malloc_heap_create(struct malloc_heap *heap, const char 
*heap_name)
return 0;
 }
 
+int
+malloc_heap_destroy(struct malloc_heap *heap)
+{
+   struct malloc_elem *elem;
+
+   if (heap->alloc_count != 0) {
+   RTE_LOG(ERR, EAL, "Heap is still in use\n");
+   rte_errno = EBUSY;
+   return -1;
+   }
+   elem = heap->first;
+   while (elem != NULL) {
+   struct malloc_elem *next = elem->next;
+
+   if (destroy_seg(elem, elem->size) < 0)
+   return -1;
+
+   elem = next;
+   }
+   if (heap->total_size != 0)
+   RTE_LOG(ERR, EAL, "Total size not zero, heap is likely 
corrupt\n");
+
+   /* we can't memset the entire thing as we're still holding the lock */
+   LIST_INIT(heap->free_head);
+   memset(&heap->name, 0, sizeof(heap->name));
+
+   return 0;
+}
+
 int
 rte_eal_malloc_heap_init(void)
 {
diff --git a/lib/librte_eal/common/malloc_heap.h 
b/lib/librte_eal/common/malloc_heap.h
index 000146365..399c9a6b1 100644
--- a/lib/librte_eal/common/malloc_heap.h
+++ b/lib/librte_eal/common/malloc_heap.h
@@ -38,6 +38,9 @@ malloc_heap_alloc_on_heap_id(const char *type, size_t size,
 int
 malloc_heap_create(struct malloc_heap *heap, const char *heap_name);
 
+int
+malloc_heap_destroy(struct malloc_heap *heap);
+
 int
 malloc_heap_add_external_memory(struct malloc_heap *heap, void *va_addr,
rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz);
diff --git a/lib/librte_eal/common/rte_malloc.c 
b/lib/librte_eal/common/rte_malloc.c
index 8d2eb7250..b6beee7ce 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -385,3 +385,30 @@ rte_malloc_heap_create(const char *heap_name)
return malloc_heap_create(heap, heap_name);
 }
 
+int
+rte_malloc_heap_destroy(const char *heap_name)
+{
+   struct malloc_heap *heap = NULL;
+   int ret;
+
+   if (heap_name == NULL ||
+   strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+   strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+   RTE_HEAP_NAME_MAX_LEN) {
+   rte_errno = EINVAL;
+   return -1;
+   }
+   /* start from non-socket heaps */
+   heap = malloc_heap_find_named_heap(heap_name);
+   if (heap == NULL) {
+   RTE_LOG(ERR, EAL, "Heap %s not found\n", heap_name);
+   rte_errno = ENOENT;
+   return -1;
+   }
+   /* sanity checks done, now we can destroy the heap */
+   rte_spinlock_lock(&heap->lock);
+   ret = malloc_heap_destroy(heap);
+   rte_spinlock_unlock(&heap->lock);
+
+   return ret;
+}
diff --git a/lib/librte_eal/rte_eal_version.map 
b/lib/librte_eal/rte_eal_version.map
index 7ee79051f..cdde7eb3b 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -282,6 +282,7 @@ EXPERIMENTAL {
rte_malloc_get_stats_from_heap;
rte_malloc_heap_add_memory;
   

[dpdk-dev] [RFC 05/11] malloc: enable retrieving statistics from named heaps

2018-07-06 Thread Anatoly Burakov
Add internal functions to look up heap by name, and enable
dumping statistics for a specified named heap.

Signed-off-by: Anatoly Burakov 
---
 lib/librte_eal/common/include/rte_malloc.h | 19 +++--
 lib/librte_eal/common/malloc_heap.c| 31 ++
 lib/librte_eal/common/malloc_heap.h|  6 +
 lib/librte_eal/common/rte_malloc.c | 17 
 lib/librte_eal/rte_eal_version.map |  1 +
 5 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/lib/librte_eal/common/include/rte_malloc.h 
b/lib/librte_eal/common/include/rte_malloc.h
index a9fb7e452..7cbcd3184 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -256,13 +256,28 @@ rte_malloc_validate(const void *ptr, size_t *size);
  * @param socket_stats
  *   A structure which provides memory to store statistics
  * @return
- *   Null on error
- *   Pointer to structure storing statistics on success
+ *   0 on success
+ *   -1 on error
  */
 int
 rte_malloc_get_socket_stats(int socket,
struct rte_malloc_socket_stats *socket_stats);
 
+/**
+ * Get heap statistics for the specified heap.
+ *
+ * @param socket
+ *   An unsigned integer specifying the socket to get heap statistics for
+ * @param socket_stats
+ *   A structure which provides memory to store statistics
+ * @return
+ *   0 on success
+ *   -1 on error
+ */
+int __rte_experimental
+rte_malloc_get_stats_from_heap(const char *heap_name,
+   struct rte_malloc_socket_stats *socket_stats);
+
 /**
  * Dump statistics.
  *
diff --git a/lib/librte_eal/common/malloc_heap.c 
b/lib/librte_eal/common/malloc_heap.c
index 8f22c062b..8437d33b3 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -614,6 +614,37 @@ malloc_heap_free_pages(void *aligned_start, size_t 
aligned_len)
return 0;
 }
 
+int
+malloc_heap_find_named_heap_idx(const char *heap_name)
+{
+   struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+   int heap_idx;
+
+   if (heap_name == NULL)
+   return -1;
+   if (strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == RTE_HEAP_NAME_MAX_LEN)
+   return -1;
+   for (heap_idx = rte_socket_count(); heap_idx < RTE_MAX_HEAPS;
+   heap_idx++) {
+   struct malloc_heap *heap = &mcfg->malloc_heaps[heap_idx];
+   if (strncmp(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN) == 0)
+   return heap_idx;
+   }
+   return -1;
+}
+
+struct malloc_heap *
+malloc_heap_find_named_heap(const char *heap_name)
+{
+   struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+   int heap_idx;
+
+   heap_idx = malloc_heap_find_named_heap_idx(heap_name);
+   if (heap_idx < 0)
+   return NULL;
+   return &mcfg->malloc_heaps[heap_idx];
+}
+
 int
 malloc_heap_free(struct malloc_elem *elem)
 {
diff --git a/lib/librte_eal/common/malloc_heap.h 
b/lib/librte_eal/common/malloc_heap.h
index 03b801415..4f3137260 100644
--- a/lib/librte_eal/common/malloc_heap.h
+++ b/lib/librte_eal/common/malloc_heap.h
@@ -29,6 +29,12 @@ void *
 malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int 
flags,
size_t align, size_t bound, bool contig);
 
+int
+malloc_heap_find_named_heap_idx(const char *name);
+
+struct malloc_heap *
+malloc_heap_find_named_heap(const char *name);
+
 int
 malloc_heap_free(struct malloc_elem *elem);
 
diff --git a/lib/librte_eal/common/rte_malloc.c 
b/lib/librte_eal/common/rte_malloc.c
index 75d6e0b4d..2508abdb1 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -165,6 +165,23 @@ rte_malloc_get_socket_stats(int socket,
socket_stats);
 }
 
+/*
+ * Function to retrieve data for heap on given socket
+ */
+int __rte_experimental
+rte_malloc_get_stats_from_heap(const char *heap_name,
+   struct rte_malloc_socket_stats *socket_stats)
+{
+   struct malloc_heap *heap;
+
+   heap = malloc_heap_find_named_heap(heap_name);
+
+   if (heap == NULL)
+   return -1;
+
+   return malloc_heap_get_stats(heap, socket_stats);
+}
+
 /*
  * Function to dump contents of all heaps
  */
diff --git a/lib/librte_eal/rte_eal_version.map 
b/lib/librte_eal/rte_eal_version.map
index e7fb37b2a..786df1e39 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -278,6 +278,7 @@ EXPERIMENTAL {
rte_fbarray_set_used;
rte_log_register_type_and_pick_level;
rte_malloc_dump_heaps;
+   rte_malloc_get_stats_from_heap;
rte_mem_alloc_validator_register;
rte_mem_alloc_validator_unregister;
rte_mem_event_callback_register;
-- 
2.17.1


[dpdk-dev] [RFC 03/11] malloc: index heaps using heap ID rather than NUMA node

2018-07-06 Thread Anatoly Burakov
Switch over all parts of EAL to use heap ID instead of NUMA node
ID to identify heaps. Heap ID for DPDK-internal heaps is NUMA
node's index within the detected NUMA node list.

Signed-off-by: Anatoly Burakov 
---
 config/common_base|  1 +
 lib/librte_eal/common/eal_common_memzone.c| 46 ++--
 .../common/include/rte_eal_memconfig.h|  4 +-
 lib/librte_eal/common/malloc_heap.c   | 53 ---
 lib/librte_eal/common/rte_malloc.c| 28 ++
 5 files changed, 84 insertions(+), 48 deletions(-)

diff --git a/config/common_base b/config/common_base
index fcf3a1f6f..b0e3937e0 100644
--- a/config/common_base
+++ b/config/common_base
@@ -61,6 +61,7 @@ CONFIG_RTE_CACHE_LINE_SIZE=64
 CONFIG_RTE_LIBRTE_EAL=y
 CONFIG_RTE_MAX_LCORE=128
 CONFIG_RTE_MAX_NUMA_NODES=8
+CONFIG_RTE_MAX_HEAPS=32
 CONFIG_RTE_MAX_MEMSEG_LISTS=64
 # each memseg list will be limited to either RTE_MAX_MEMSEG_PER_LIST pages
 # or RTE_MAX_MEM_MB_PER_LIST megabytes worth of memory, whichever is smaller
diff --git a/lib/librte_eal/common/eal_common_memzone.c 
b/lib/librte_eal/common/eal_common_memzone.c
index faa3b0615..25c56052c 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -52,6 +52,26 @@ memzone_lookup_thread_unsafe(const char *name)
return NULL;
 }
 
+static size_t
+heap_max_free_elem(unsigned int heap_idx, unsigned int align)
+{
+   struct rte_malloc_socket_stats stats;
+   struct rte_mem_config *mcfg;
+   size_t len;
+
+   /* get pointer to global configuration */
+   mcfg = rte_eal_get_configuration()->mem_config;
+
+   malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx], &stats);
+
+   len = stats.greatest_free_size;
+
+   if (len < MALLOC_ELEM_OVERHEAD + align)
+   return 0;
+
+   return len - MALLOC_ELEM_OVERHEAD - align;
+}
+
 
 /* This function will return the greatest free block if a heap has been
  * specified. If no heap has been specified, it will return the heap and
@@ -59,29 +79,23 @@ memzone_lookup_thread_unsafe(const char *name)
 static size_t
 find_heap_max_free_elem(int *s, unsigned align)
 {
-   struct rte_mem_config *mcfg;
-   struct rte_malloc_socket_stats stats;
-   int i, socket = *s;
+   unsigned int idx;
+   int socket = *s;
size_t len = 0;
 
-   /* get pointer to global configuration */
-   mcfg = rte_eal_get_configuration()->mem_config;
-
-   for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
-   if ((socket != SOCKET_ID_ANY) && (socket != i))
+   for (idx = 0; idx < rte_socket_count(); idx++) {
+   int cur_socket = rte_socket_id_by_idx(idx);
+   if ((socket != SOCKET_ID_ANY) && (socket != cur_socket))
continue;
 
-   malloc_heap_get_stats(&mcfg->malloc_heaps[i], &stats);
-   if (stats.greatest_free_size > len) {
-   len = stats.greatest_free_size;
-   *s = i;
+   size_t cur_len = heap_max_free_elem(idx, align);
+   if (cur_len > len) {
+   len = cur_len;
+   *s = cur_socket;
}
}
 
-   if (len < MALLOC_ELEM_OVERHEAD + align)
-   return 0;
-
-   return len - MALLOC_ELEM_OVERHEAD - align;
+   return len;
 }
 
 static const struct rte_memzone *
diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h 
b/lib/librte_eal/common/include/rte_eal_memconfig.h
index 4e8720ba6..7e03196a6 100644
--- a/lib/librte_eal/common/include/rte_eal_memconfig.h
+++ b/lib/librte_eal/common/include/rte_eal_memconfig.h
@@ -71,8 +71,8 @@ struct rte_mem_config {
 
struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for 
objects */
 
-   /* Heaps of Malloc per socket */
-   struct malloc_heap malloc_heaps[RTE_MAX_NUMA_NODES];
+   /* Heaps of Malloc */
+   struct malloc_heap malloc_heaps[RTE_MAX_HEAPS];
 
/* address of mem_config in primary process. used to map shared config 
into
 * exact same address the primary process maps it.
diff --git a/lib/librte_eal/common/malloc_heap.c 
b/lib/librte_eal/common/malloc_heap.c
index 8a1f54905..e7e1838b1 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -93,9 +93,10 @@ malloc_add_seg(const struct rte_memseg_list *msl,
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
struct rte_memseg_list *found_msl;
struct malloc_heap *heap;
-   int msl_idx;
+   int msl_idx, heap_idx;
 
-   heap = &mcfg->malloc_heaps[msl->socket_id];
+   heap_idx = rte_socket_idx_by_id(msl->socket_id);
+   heap = &mcfg->malloc_heaps[heap_idx];
 
/* msl is const, so find it */
msl_idx = msl - mcfg->memsegs;
@@ -494,14 +495,20 @@ alloc_more_mem_on_socket(struct malloc_heap *heap, size_t 
size, int socket,

[dpdk-dev] [RFC 11/11] memzone: enable reserving memory from named heaps

2018-07-06 Thread Anatoly Burakov
Add ability to allocate memory for memzones from named heaps. The
semantics are kept similar to regular allocations, and as much of
the code as possible is shared.

Signed-off-by: Anatoly Burakov 
---
 lib/librte_eal/common/eal_common_memzone.c  | 237 +++-
 lib/librte_eal/common/include/rte_memzone.h | 183 +++
 lib/librte_eal/rte_eal_version.map  |   3 +
 3 files changed, 373 insertions(+), 50 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_memzone.c 
b/lib/librte_eal/common/eal_common_memzone.c
index 25c56052c..d37e7ae1d 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -98,17 +98,14 @@ find_heap_max_free_elem(int *s, unsigned align)
return len;
 }
 
-static const struct rte_memzone *
-memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
-   int socket_id, unsigned int flags, unsigned int align,
+static int
+common_checks(const char *name, size_t len, unsigned int align,
unsigned int bound)
 {
struct rte_memzone *mz;
struct rte_mem_config *mcfg;
struct rte_fbarray *arr;
size_t requested_len;
-   int mz_idx;
-   bool contig;
 
/* get pointer to global configuration */
mcfg = rte_eal_get_configuration()->mem_config;
@@ -118,14 +115,14 @@ memzone_reserve_aligned_thread_unsafe(const char *name, 
size_t len,
if (arr->count >= arr->len) {
RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__);
rte_errno = ENOSPC;
-   return NULL;
+   return -1;
}
 
if (strlen(name) > sizeof(mz->name) - 1) {
RTE_LOG(DEBUG, EAL, "%s(): memzone <%s>: name too long\n",
__func__, name);
rte_errno = ENAMETOOLONG;
-   return NULL;
+   return -1;
}
 
/* zone already exist */
@@ -133,7 +130,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, 
size_t len,
RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n",
__func__, name);
rte_errno = EEXIST;
-   return NULL;
+   return -1;
}
 
/* if alignment is not a power of two */
@@ -141,7 +138,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, 
size_t len,
RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__,
align);
rte_errno = EINVAL;
-   return NULL;
+   return -1;
}
 
/* alignment less than cache size is not allowed */
@@ -151,7 +148,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, 
size_t len,
/* align length on cache boundary. Check for overflow before doing so */
if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) {
rte_errno = EINVAL; /* requested size too big */
-   return NULL;
+   return -1;
}
 
len += RTE_CACHE_LINE_MASK;
@@ -163,49 +160,23 @@ memzone_reserve_aligned_thread_unsafe(const char *name, 
size_t len,
/* check that boundary condition is valid */
if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) 
{
rte_errno = EINVAL;
-   return NULL;
-   }
-
-   if ((socket_id != SOCKET_ID_ANY) &&
-   (socket_id >= RTE_MAX_NUMA_NODES || socket_id < 0)) {
-   rte_errno = EINVAL;
-   return NULL;
-   }
-
-   if (!rte_eal_has_hugepages())
-   socket_id = SOCKET_ID_ANY;
-
-   contig = (flags & RTE_MEMZONE_IOVA_CONTIG) != 0;
-   /* malloc only cares about size flags, remove contig flag from flags */
-   flags &= ~RTE_MEMZONE_IOVA_CONTIG;
-
-   if (len == 0) {
-   /* len == 0 is only allowed for non-contiguous zones */
-   if (contig) {
-   RTE_LOG(DEBUG, EAL, "Reserving zero-length contiguous 
memzones is not supported\n");
-   rte_errno = EINVAL;
-   return NULL;
-   }
-   if (bound != 0)
-   requested_len = bound;
-   else {
-   requested_len = find_heap_max_free_elem(&socket_id, 
align);
-   if (requested_len == 0) {
-   rte_errno = ENOMEM;
-   return NULL;
-   }
-   }
-   }
-
-   /* allocate memory on heap */
-   void *mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id, flags,
-   align, bound, contig);
-   if (mz_addr == NULL) {
-   rte_errno = ENOMEM;
-   return NULL;
+   return -1;
}
+   return 0;
+}
 
+static const struct rte_memzone *
+create_memzone(const char *name, void *mz_addr, size_t requested_len)
+{

[dpdk-dev] [PATCH v1] lib/metrics: add check for invalid metric keys

2018-07-06 Thread Remy Horton
This patchset adds a check to rte_metrics_update_values()
that prevents the updating of metrics when presented with
an invalid metric key. Previously, doing the latter could
result in a crash.

Fixes: 349950ddb9c5 ("metrics: add information metrics library")

Signed-off-by: Remy Horton 
---
 lib/librte_metrics/rte_metrics.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/lib/librte_metrics/rte_metrics.c b/lib/librte_metrics/rte_metrics.c
index 258f058..b5638f5 100644
--- a/lib/librte_metrics/rte_metrics.c
+++ b/lib/librte_metrics/rte_metrics.c
@@ -159,6 +159,11 @@ rte_metrics_update_values(int port_id,
stats = memzone->addr;
 
rte_spinlock_lock(&stats->lock);
+
+   if (key >= stats->cnt_stats) {
+   rte_spinlock_unlock(&stats->lock);
+   return -EINVAL;
+   }
idx_metric = key;
cnt_setsize = 1;
while (idx_metric < stats->cnt_stats) {
-- 
2.9.5



[dpdk-dev] [PATCH v5 1/4] doc: cleanup ISA-L PMD feature matrix

2018-07-06 Thread Pablo de Lara
In PMD feature matrices (.ini files), it is not required to
have the list of features that are not supported,
just the ones that are.

Signed-off-by: Pablo de Lara 
Acked-by: Lee Daly 
---

v5:
- Removed "HW Accelerated" from isa-l feature list

v4:
- No change

 doc/guides/compressdevs/features/isal.ini | 9 -
 1 file changed, 9 deletions(-)

diff --git a/doc/guides/compressdevs/features/isal.ini 
b/doc/guides/compressdevs/features/isal.ini
index ad2718df0..7183d1034 100644
--- a/doc/guides/compressdevs/features/isal.ini
+++ b/doc/guides/compressdevs/features/isal.ini
@@ -4,19 +4,10 @@
 ; Supported features of 'ISA-L' compression driver.
 ;
 [Features]
-HW Accelerated =
 CPU SSE= Y
 CPU AVX= Y
 CPU AVX2   = Y
 CPU AVX512 = Y
-CPU NEON   =
-Stateful   =
-By-Pass=
-Chained mbufs  =
 Deflate= Y
-LZS=
-Adler32=
-Crc32  =
-Adler32&Crc32  =
 Fixed  = Y
 Dynamic= Y
-- 
2.14.4



[dpdk-dev] [PATCH v5 4/4] compressdev: add huffman encoding flags

2018-07-06 Thread Pablo de Lara
Added Huffman fixed and dynamic encoding feature flags,
so an application can query if a device supports
these two types, when performing DEFLATE compression.

Signed-off-by: Pablo de Lara 
Acked-by: Fiona Trahe 
---

v5-v3:
- No change

v2:
- Fixed typo

 drivers/compress/isal/isal_compress_pmd_ops.c |  4 +++-
 lib/librte_compressdev/rte_comp.c |  4 
 lib/librte_compressdev/rte_comp.h |  4 
 test/test/test_compressdev.c  | 16 
 4 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/drivers/compress/isal/isal_compress_pmd_ops.c 
b/drivers/compress/isal/isal_compress_pmd_ops.c
index 970a0413b..585f22802 100644
--- a/drivers/compress/isal/isal_compress_pmd_ops.c
+++ b/drivers/compress/isal/isal_compress_pmd_ops.c
@@ -12,7 +12,9 @@
 static const struct rte_compressdev_capabilities isal_pmd_capabilities[] = {
{
.algo = RTE_COMP_ALGO_DEFLATE,
-   .comp_feature_flags =   RTE_COMP_FF_SHAREABLE_PRIV_XFORM,
+   .comp_feature_flags =   RTE_COMP_FF_SHAREABLE_PRIV_XFORM |
+   RTE_COMP_FF_HUFFMAN_FIXED |
+   RTE_COMP_FF_HUFFMAN_DYNAMIC,
.window_size = {
.min = 15,
.max = 15,
diff --git a/lib/librte_compressdev/rte_comp.c 
b/lib/librte_compressdev/rte_comp.c
index 97ea0d922..98ad0cfd9 100644
--- a/lib/librte_compressdev/rte_comp.c
+++ b/lib/librte_compressdev/rte_comp.c
@@ -36,6 +36,10 @@ rte_comp_get_feature_name(uint64_t flag)
return "SHA2_SHA256_HASH";
case RTE_COMP_FF_SHAREABLE_PRIV_XFORM:
return "SHAREABLE_PRIV_XFORM";
+   case RTE_COMP_FF_HUFFMAN_FIXED:
+   return "HUFFMAN_FIXED";
+   case RTE_COMP_FF_HUFFMAN_DYNAMIC:
+   return "HUFFMAN_DYNAMIC";
default:
return NULL;
}
diff --git a/lib/librte_compressdev/rte_comp.h 
b/lib/librte_compressdev/rte_comp.h
index 274b5eadf..1f66945ee 100644
--- a/lib/librte_compressdev/rte_comp.h
+++ b/lib/librte_compressdev/rte_comp.h
@@ -63,6 +63,10 @@ extern "C" {
  * to create as many priv_xforms as it expects to have stateless
  * operations in-flight.
  */
+#define RTE_COMP_FF_HUFFMAN_FIXED  (1ULL << 13)
+/**< Fixed huffman encoding is supported */
+#define RTE_COMP_FF_HUFFMAN_DYNAMIC(1ULL << 14)
+/**< Dynamic huffman encoding is supported */
 
 /** Status of comp operation */
 enum rte_comp_op_status {
diff --git a/test/test/test_compressdev.c b/test/test/test_compressdev.c
index 640942bac..f960963a4 100644
--- a/test/test/test_compressdev.c
+++ b/test/test/test_compressdev.c
@@ -846,6 +846,14 @@ test_compressdev_deflate_stateless_fixed(void)
const char *test_buffer;
uint16_t i;
int ret;
+   const struct rte_compressdev_capabilities *capab;
+
+   capab = rte_compressdev_capability_get(0, RTE_COMP_ALGO_DEFLATE);
+   TEST_ASSERT(capab != NULL, "Failed to retrieve device capabilities");
+
+   if ((capab->comp_feature_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0)
+   return -ENOTSUP;
+
struct rte_comp_xform *compress_xform =
rte_malloc(NULL, sizeof(struct rte_comp_xform), 0);
 
@@ -905,6 +913,14 @@ test_compressdev_deflate_stateless_dynamic(void)
struct rte_comp_xform *compress_xform =
rte_malloc(NULL, sizeof(struct rte_comp_xform), 0);
 
+   const struct rte_compressdev_capabilities *capab;
+
+   capab = rte_compressdev_capability_get(0, RTE_COMP_ALGO_DEFLATE);
+   TEST_ASSERT(capab != NULL, "Failed to retrieve device capabilities");
+
+   if ((capab->comp_feature_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0)
+   return -ENOTSUP;
+
if (compress_xform == NULL) {
RTE_LOG(ERR, USER1,
"Compress xform could not be created\n");
-- 
2.14.4



[dpdk-dev] [PATCH v5 3/4] compressdev: replace mbuf scatter gather flag

2018-07-06 Thread Pablo de Lara
The current mbuf scatter gather feature flag is
too ambiguous, as it is not clear if input and/or output
buffers can be scatter gather mbufs or not.

Therefore, three new flags will replace this flag:
- RTE_COMP_FF_OOP_SGL_IN_SGL_OUT
- RTE_COMP_FF_OOP_SGL_IN_FB_OUT
- RTE_COMP_FF_OOP_LB_IN_SGL_OUT

Note that out-of-place flat buffers is supported by default
and in-place is not supported by the library.

Signed-off-by: Pablo de Lara 
Acked-by: Fiona Trahe 
---

v5:
- Replaced left "Flat Buffer" with "Linear Buffer" (Shally)
- Rephrased comment about new feature flags (Shally)

v4:
- Replaced FB (Flat Buffers) with LB (Linear Buffers) (Shally)
- Add extra explanation on comments about Linear Buffers vs
  Scatter-gather lists

v3:
- Replaced Out-of-place with OOP
- Added new feature flags in default.ini

v2:
- Fixed typos
- Rephrased comments

 doc/guides/compressdevs/features/default.ini | 34 +++-
 doc/guides/compressdevs/overview.rst | 14 
 doc/guides/rel_notes/release_18_08.rst   |  6 +
 lib/librte_compressdev/rte_comp.c|  8 +--
 lib/librte_compressdev/rte_comp.h| 31 +
 5 files changed, 65 insertions(+), 28 deletions(-)

diff --git a/doc/guides/compressdevs/features/default.ini 
b/doc/guides/compressdevs/features/default.ini
index a88414d23..829e4df61 100644
--- a/doc/guides/compressdevs/features/default.ini
+++ b/doc/guides/compressdevs/features/default.ini
@@ -6,19 +6,21 @@
 ; the features table in the documentation.
 ;
 [Features]
-HW Accelerated =
-CPU SSE=
-CPU AVX=
-CPU AVX2   =
-CPU AVX512 =
-CPU NEON   =
-Stateful   =
-Pass-through   =
-Chained mbufs  =
-Deflate=
-LZS=
-Adler32=
-Crc32  =
-Adler32&Crc32  =
-Fixed  =
-Dynamic=
+HW Accelerated  =
+CPU SSE =
+CPU AVX =
+CPU AVX2=
+CPU AVX512  =
+CPU NEON=
+Stateful=
+Pass-through=
+OOP SGL In SGL Out  =
+OOP SGL In LB  Out  =
+OOP LB  In SGL Out  =
+Deflate =
+LZS =
+Adler32 =
+Crc32   =
+Adler32&Crc32   =
+Fixed   =
+Dynamic =
diff --git a/doc/guides/compressdevs/overview.rst 
b/doc/guides/compressdevs/overview.rst
index d01c1a966..70bbe82b7 100644
--- a/doc/guides/compressdevs/overview.rst
+++ b/doc/guides/compressdevs/overview.rst
@@ -16,3 +16,17 @@ Supported Feature Flags
- "Pass-through" feature flag refers to the ability of the PMD
  to let input buffers pass-through it, copying the input to the output,
  without making any modifications to it (no compression done).
+
+   - "OOP SGL In SGL Out" feature flag stands for
+ "Out-of-place Scatter-gather list Input, Scatter-gater list Output",
+ which means PMD supports different scatter-gather styled input and output 
buffers
+ (i.e. both can consists of multiple segments).
+
+   - "OOP SGL In LB Out" feature flag stands for
+ "Out-of-place Scatter-gather list Input, Linear Buffers Output",
+ which means PMD supports input from scatter-gathered styled buffers, 
outputting linear buffers
+ (i.e. single segment).
+
+   - "OOP LB In SGL Out" feature flag stands for
+ "Out-of-place Linear Buffers Input, Scatter-gather list Output",
+ which means PMD supports input from linear buffer, outputting 
scatter-gathered styled buffers.
diff --git a/doc/guides/rel_notes/release_18_08.rst 
b/doc/guides/rel_notes/release_18_08.rst
index bc0124295..3487e3fb9 100644
--- a/doc/guides/rel_notes/release_18_08.rst
+++ b/doc/guides/rel_notes/release_18_08.rst
@@ -60,6 +60,12 @@ API Changes
Also, make sure to start the actual text at the margin.
=
 
+* compressdev: Feature flag ``RTE_COMP_FF_MBUF_SCATTER_GATHER`` is
+  replaced with the following more explicit flags:
+  - ``RTE_COMP_FF_OOP_SGL_IN_SGL_OUT``
+  - ``RTE_COMP_FF_OOP_SGL_IN_LB_OUT``
+  - ``RTE_COMP_FF_OOP_LB_IN_SGL_OUT``
+
 
 ABI Changes
 ---
diff --git a/lib/librte_compressdev/rte_comp.c 
b/lib/librte_compressdev/rte_comp.c
index d596ba872..97ea0d922 100644
--- a/lib/librte_compressdev/rte_comp.c
+++ b/lib/librte_compressdev/rte_comp.c
@@ -14,8 +14,12 @@ rte_comp_get_feature_name(uint64_t flag)
return "STATEFUL_COMPRESSION";
case RTE_COMP_FF_STATEFUL_DECOMPRESSION:
return "STATEFUL_DECOMPRESSION";
-   case RTE_COMP_FF_MBUF_SCATTER_GATHER:
-   return "MBUF_SCATTER_GATHER";
+   case RTE_COMP_FF_OOP_SGL_IN_SGL_OUT:
+   return "OOP_SGL_IN_SGL_OUT";
+   case RTE_COMP_FF_OOP_SGL_IN_LB_OUT:
+   return "OOP_SGL_IN_LB_OUT";
+   case RTE_COMP_FF_OOP_LB_IN_SGL_OUT:
+   return "OOP_LB_IN_SGL_OUT";
case RTE_COMP_FF_MULTI_PKT_CHECKSUM:
return "MULTI_PKT_CHECKSUM";
case RTE_COMP_FF_ADLER

[dpdk-dev] [PATCH v5 2/4] doc: rename compress feature flag

2018-07-06 Thread Pablo de Lara
Renamed feature "Bypass" to "Pass-through",
as it is a more explicit name, meaning that the PMD
is capable of passing the mbufs through it,
without making any modifications (i.e.. NULL algorithm).

Signed-off-by: Pablo de Lara 
Acked-by: Fiona Trahe 
Acked-by: Shally Verma 
---

v5:
- No change

v4:
- Rephrased pass-through feature comment (Shally)

 doc/guides/compressdevs/features/default.ini | 2 +-
 doc/guides/compressdevs/overview.rst | 6 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/doc/guides/compressdevs/features/default.ini 
b/doc/guides/compressdevs/features/default.ini
index 795fc5577..a88414d23 100644
--- a/doc/guides/compressdevs/features/default.ini
+++ b/doc/guides/compressdevs/features/default.ini
@@ -13,7 +13,7 @@ CPU AVX2   =
 CPU AVX512 =
 CPU NEON   =
 Stateful   =
-By-Pass=
+Pass-through   =
 Chained mbufs  =
 Deflate=
 LZS=
diff --git a/doc/guides/compressdevs/overview.rst 
b/doc/guides/compressdevs/overview.rst
index ca37de175..d01c1a966 100644
--- a/doc/guides/compressdevs/overview.rst
+++ b/doc/guides/compressdevs/overview.rst
@@ -10,3 +10,9 @@ Supported Feature Flags
 .. _table_compression_pmd_features:
 
 .. include:: overview_feature_table.txt
+
+.. Note::
+
+   - "Pass-through" feature flag refers to the ability of the PMD
+ to let input buffers pass-through it, copying the input to the output,
+ without making any modifications to it (no compression done).
-- 
2.14.4



[dpdk-dev] [PATCH] cryptodev: rename experimental private data APIs

2018-07-06 Thread Fiona Trahe
The name private_data is confusing in these APIs:
rte_cryptodev_sym_session_set_private_data()
rte_cryptodev_sym_session_get_private_data()
It refers to data added at the end of the session hdr for
use by the application.
The session already contains sess_private_data[index]
which is used to store private pmd data and most references to private
data refer to that.
e.g. external apis
rte_cryptodev_sym_get_private_session_size() and internal
set/get_session_private_data() refer to sess_private_data[].

So rename to user_data, i.e.
rte_cryptodev_sym_session_set_user_data()
rte_cryptodev_sym_session_get_user_data()

Refers to changes introduced here:
https://patches.dpdk.org/patch/38172/

Signed-off-by: Fiona Trahe 
---
 doc/guides/prog_guide/cryptodev_lib.rst| 14 +++---
 doc/guides/prog_guide/event_crypto_adapter.rst |  6 +++---
 doc/guides/rel_notes/release_18_08.rst |  8 
 lib/librte_cryptodev/rte_cryptodev.c   | 16 
 lib/librte_cryptodev/rte_cryptodev.h   | 14 +++---
 lib/librte_cryptodev/rte_cryptodev_version.map |  4 ++--
 lib/librte_eventdev/rte_event_crypto_adapter.c |  4 ++--
 test/test/test_event_crypto_adapter.c  |  8 
 8 files changed, 41 insertions(+), 33 deletions(-)

diff --git a/doc/guides/prog_guide/cryptodev_lib.rst 
b/doc/guides/prog_guide/cryptodev_lib.rst
index 30f0bcf7a..3dbf4dde6 100644
--- a/doc/guides/prog_guide/cryptodev_lib.rst
+++ b/doc/guides/prog_guide/cryptodev_lib.rst
@@ -302,24 +302,24 @@ enqueue call.
 Private data
 
 For session-based operations, the set and get API provides a mechanism for an
-application to store and retrieve the private data information stored along 
with
-the crypto session.
+application to store and retrieve the private user data information stored 
along
+with the crypto session.
 
 For example, suppose an application is submitting a crypto operation with a 
session
-associated and wants to indicate private data information which is required to 
be
+associated and wants to indicate private user data information which is 
required to be
 used after completion of the crypto operation. In this case, the application 
can use
-the set API to set the private data and retrieve it using get API.
+the set API to set the user data and retrieve it using get API.
 
 .. code-block:: c
 
-   int rte_cryptodev_sym_session_set_private_data(
+   int rte_cryptodev_sym_session_set_user_data(
struct rte_cryptodev_sym_session *sess, void *data, uint16_t 
size);
 
-   void * rte_cryptodev_sym_session_get_private_data(
+   void * rte_cryptodev_sym_session_get_user_data(
struct rte_cryptodev_sym_session *sess);
 
 
-For session-less mode, the private data information can be placed along with 
the
+For session-less mode, the private user data information can be placed along 
with the
 ``struct rte_crypto_op``. The ``rte_crypto_op::private_data_offset`` indicates 
the
 start of private data information. The offset is counted from the start of the
 rte_crypto_op including other crypto information such as the IVs (since there 
can
diff --git a/doc/guides/prog_guide/event_crypto_adapter.rst 
b/doc/guides/prog_guide/event_crypto_adapter.rst
index 5c1354dec..9fe09c805 100644
--- a/doc/guides/prog_guide/event_crypto_adapter.rst
+++ b/doc/guides/prog_guide/event_crypto_adapter.rst
@@ -223,9 +223,9 @@ crypto security session or at an offset in the ``struct 
rte_crypto_op``.
 The ``rte_crypto_op::private_data_offset`` is used to locate the request/
 response in the ``rte_crypto_op``.
 
-For crypto session, ``rte_cryptodev_sym_session_set_private_data()`` API
+For crypto session, ``rte_cryptodev_sym_session_set_user_data()`` API
 will be used to set request/response data. The same data will be obtained
-by ``rte_cryptodev_sym_session_get_private_data()`` API.  The
+by ``rte_cryptodev_sym_session_get_user_data()`` API.  The
 RTE_EVENT_CRYPTO_ADAPTER_CAP_SESSION_PRIVATE_DATA capability indicates
 whether HW or SW supports this feature.
 
@@ -257,7 +257,7 @@ the ``rte_crypto_op``.
 m_data.request_info.cdev_id = cdev_id;
 m_data.request_info.queue_pair_id = qp_id;
 /* Call set API to store private data information */
-rte_cryptodev_sym_session_set_private_data(
+rte_cryptodev_sym_session_set_user_data(
 op->sym->session,
 &m_data,
 sizeof(m_data));
diff --git a/doc/guides/rel_notes/release_18_08.rst 
b/doc/guides/rel_notes/release_18_08.rst
index bc0124295..8f84a088c 100644
--- a/doc/guides/rel_notes/release_18_08.rst
+++ b/doc/guides/rel_notes/release_18_08.rst
@@ -60,6 +60,14 @@ API Changes
Also, make sure to start the actual text at the margin.
=
 
+* **Renamed cryptodev experimental APIs.**
+
+  Used user_data instead of private_da

Re: [dpdk-dev] [PATCH v4 2/4] cryptodev: support asymmetric operations

2018-07-06 Thread Trahe, Fiona
Hi Shally, Umesh,

> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Shally Verma
> Sent: Tuesday, July 3, 2018 4:24 PM
> To: De Lara Guarch, Pablo 
> Cc: dev@dpdk.org; pathr...@caviumnetworks.com; nmur...@caviumnetworks.com; 
> Umesh Kartha
> ; Sunila Sahu 
> ; Ashish
> Gupta 
> Subject: [dpdk-dev] [PATCH v4 2/4] cryptodev: support asymmetric operations


//snip//
> +
> +int __rte_experimental
> +rte_cryptodev_asym_session_set_private_data(
> + struct rte_cryptodev_asym_session *sess,
> + void *data,
> + uint16_t size)
> +{
> + uint16_t off_set = sizeof(void *) * nb_drivers;
> + uint8_t *private_data_present = (uint8_t *)sess + off_set;
> +
> + if (sess == NULL)
> + return -EINVAL;
> +
> + *private_data_present = 1;
> + off_set += sizeof(uint8_t);
> + rte_memcpy((uint8_t *)sess + off_set, data, size);
> + return 0;
> +}
> +
> +void * __rte_experimental
> +rte_cryptodev_asym_session_get_app_private_data(
> + struct rte_cryptodev_asym_session *sess)
[Fiona] The set api should be renamed if the get function is renamed. 
However, I'd suggest leaving out these functions unless they're really needed 
for asymm.
Are they just here for consistency with the sym functions?
The sym functions are still experimental and I think the names should be 
changed to
use user_data instead of private_data.
I've just sent a patch to the mailing list about this - it would be better to 
resolve that naming
issue first and add corresponding fns later to this api if needed. 


Re: [dpdk-dev] [PATCH v4 2/4] cryptodev: support asymmetric operations

2018-07-06 Thread Verma, Shally
Hi Fiona

>-Original Message-
>From: Trahe, Fiona [mailto:fiona.tr...@intel.com]
>Sent: 06 July 2018 19:11
>To: Verma, Shally ; De Lara Guarch, Pablo 
>
>Cc: dev@dpdk.org; Athreya, Narayana Prasad 
>; Murthy, Nidadavolu
>; Kartha, Umesh ; Sahu, 
>Sunila ; Gupta,
>Ashish ; Trahe, Fiona 
>Subject: RE: [dpdk-dev] [PATCH v4 2/4] cryptodev: support asymmetric operations
>
>External Email
>
>Hi Shally, Umesh,
>
>> -Original Message-
>> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Shally Verma
>> Sent: Tuesday, July 3, 2018 4:24 PM
>> To: De Lara Guarch, Pablo 
>> Cc: dev@dpdk.org; pathr...@caviumnetworks.com; nmur...@caviumnetworks.com; 
>> Umesh Kartha
>> ; Sunila Sahu 
>> ; Ashish
>> Gupta 
>> Subject: [dpdk-dev] [PATCH v4 2/4] cryptodev: support asymmetric operations
>
>
>//snip//
>> +
>> +int __rte_experimental
>> +rte_cryptodev_asym_session_set_private_data(
>> + struct rte_cryptodev_asym_session 
>> *sess,
>> + void *data,
>> + uint16_t size)
>> +{
>> + uint16_t off_set = sizeof(void *) * nb_drivers;
>> + uint8_t *private_data_present = (uint8_t *)sess + off_set;
>> +
>> + if (sess == NULL)
>> + return -EINVAL;
>> +
>> + *private_data_present = 1;
>> + off_set += sizeof(uint8_t);
>> + rte_memcpy((uint8_t *)sess + off_set, data, size);
>> + return 0;
>> +}
>> +
>> +void * __rte_experimental
>> +rte_cryptodev_asym_session_get_app_private_data(
>> + struct rte_cryptodev_asym_session *sess)
>[Fiona] The set api should be renamed if the get function is renamed.
>However, I'd suggest leaving out these functions unless they're really needed 
>for asymm.
>Are they just here for consistency with the sym functions?
>The sym functions are still experimental and I think the names should be 
>changed to
>use user_data instead of private_data.
>I've just sent a patch to the mailing list about this - it would be better to 
>resolve that naming
>issue first and add corresponding fns later to this api if needed.

Ya . right now they were there for consistency. You prefer to remove them?

Thanks
Shally


Re: [dpdk-dev] [PATCH v4 2/4] cryptodev: support asymmetric operations

2018-07-06 Thread Trahe, Fiona
Hi Shally
 
> Ya . right now they were there for consistency. You prefer to remove them?
 
Yes.


[dpdk-dev] [PATCH v9 00/19] enable hotplug on multi-process

2018-07-06 Thread Qi Zhang
v9:
- Move hotplug IPC from rte_eth_dev_attach/rte_eth_dev_detach to
  eal_dev_hotplug_add and eal_dev_hotplug_remove, now all kinds of
  devices will be synced in multi-process.
- Fix couple issue when a device is bound to vfio.
  1) The device can't be detached clearly in a secondary process, which
 also cause it can't be attached again, due to the error that
 /dev/vfio/ is still busy.(see Patch 3/19 and 4/19)
  2) repeat detach/attach device will cause "cannot find TAILQ entry
 for PCI device" due to incorrect PCI address compare.
 (see patch 2/19).
- Removed device lock.
- Removed private device support.
- Fix commit log grammar issue

v8:
- update rte_eal_version.map due to new API added.
- minor reword on release note.
- minor fix on commit log and code style.

NOTE:
  Some issues which is not related with this patchset is expected when
  play with hotplug_mp sample as belows.

- Attach a PCI device twice may cause device can't be detached
  below fix is required:
  https://patches.dpdk.org/patch/42030/

- ixgbe device can't detached, below fix is required
  https://patches.dpdk.org/patch/42031/

v7:
- update rte_ethdev_version.map for new APIs.
- improve code readability in __handle_secondary_request by use goto.
- add comments to explain why need to call rte_eal_alarm_set.
- add error log when process_mp_init_callbacks failed.
- reword release notes base on Anatoly's suggestion.
- add back previous "Acked-by" and "Reviewed-by" in commit log.

  NOTE: current patchset depends on below IPC fix, or it may not be able
  to attach a shared vdev.
  https://patches.dpdk.org/patch/41647/

v6:
- remove bus->scan_one, since ABI break is not necessary.
- remove patch for failsafe PMD since it will not support secondary.
- fix wrong implemenation on ixgbe.
- add rte_eth_dev_release_port_private into rte_eth_dev_pci_generic_remove for
  secondary process, so we don't need to patch on PMD if PMD use the
  default remove function.
- add release notes update.
- agreed to use strdup(peer) as workaround for repling a sync request in 
seperate
  thread.

v5:
- since we will keep mp thread separate from interrupt thread,
  it is not necessary to use temporary thread, we use rte_eal_alarm_set.
- remove the change in rte_eth_dev_release_port, since there is a better
  way to prevent rte_eth_dev_release_port be called after
  rte_eth_dev_release_port_private.
- fix the issue that lock does not take effect on secondary due to
  previous re-work
- fix the issue when the first attached device is a private device from
  secondary. (patch 8/24)
- work around for reply a sync request in separate thread, this is still
  an open and in discussion as below.
  https://mails.dpdk.org/archives/dev/2018-June/105359.html

v4:
- since mp thread will be merged to interrupt thread, the fix on v3
  for sync IPC deadlock will not work. the new version enable the
  machanism to invoke a mp action callback in a temporary thread to
  avoid the IPC deadlock, with this, secondary to primary request
  impelemtation also be simplified, since we can use sync request
  directly in a separate thread.

v3:
- enable mp init callback register to help non-eal module to initialize
  mp channel during rte_eal_init
- fix when attach share device from secondary.
  1) dead lock due to sync IPC be invoked in rte_malloc in primary
 process when handle secondary request to attach device, the
 solution is primary process to issue share device attach/detach
 in interrupt thread.
  2) return port_id not correct.
- check nb_sent and nb_received in sync IPC.
- fix memory leak duirng error handling at attach_on_secondary.
- improve clean_lock_callback to only lock/unlock spinlock once
- improve error code return in check-reply during async IPC.
- remove rte_ prefix of internal function in ethdev_mp.c
- sample code improvement.
  1) rename sample to "hotplug_mp", and move to example/multi-process.
  2) cleanup header include.
  3) call rte_eal_cleanup before exit.

v2:
- rename rte_ethdev_mp.* to ethdev_mp.*
- rename rte_ethdev_lock.* to ethdev_lock.*
- move internal funciton to ethdev_private.h
- separate rte_eth_dev_[un]lock into rte_eth_dev_[un]lock and
  rte_eth_dev_[un]lock_with_callback
- lock callbacks will be removed automatically after device is detached.
- add experimental tag for all new APIs.
- fix coding style issue.
- fix wrong lisence header in sample code.
- fix spelling 
- fix meson.build.
- improve comments. 

Background:
===

Currently secondary process will only sync ethdev from primary
process at init stage, but it will not be aware if device
is attached/detached on primary process at runtime.

While there is the requirement from application that take
primary-secondary process model. The primary process work as a
resource management process, it will create/destroy virtual device
at runtime, while the secondary process deal with the network stuff
with these devices.

Solution:
=

So the orignial intention

[dpdk-dev] [PATCH v9 09/19] net/af_packet: enable port detach on secondary process

2018-07-06 Thread Qi Zhang
Previously, detach port on a secondary process will mess primary
process and cause the same device can't be attached back again.
A secondary process should use rte_eth_release_port_private to
release a port.

Signed-off-by: Qi Zhang 
---
 drivers/net/af_packet/rte_eth_af_packet.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c 
b/drivers/net/af_packet/rte_eth_af_packet.c
index ea47abbf8..33ac19de8 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -935,6 +935,7 @@ rte_pmd_af_packet_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+   eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -986,6 +987,16 @@ rte_pmd_af_packet_remove(struct rte_vdev_device *dev)
if (eth_dev == NULL)
return -1;
 
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+   /* detach device on local pprocess only */
+   if (strlen(rte_vdev_device_args(dev)) == 0)
+   return rte_eth_dev_release_port_private(eth_dev);
+   /**
+* else this is a private device for current process
+* so continue with normal detach scenario
+*/
+   }
+
internals = eth_dev->data->dev_private;
for (q = 0; q < internals->nb_queues; q++) {
rte_free(internals->rx_queue[q].rd);
-- 
2.13.6



[dpdk-dev] [PATCH v9 02/19] bus/pci: fix PCI address compare

2018-07-06 Thread Qi Zhang
When use memcmp to compare two PCI address, sizeof(struct rte_pci_addr)
is 4 bytes aligned, and it is 8. While only 7 byte of struct rte_pci_addr
is valid. So compare the 8th byte will cause the unexpected result, which
happens when repeatedly attach/detach a device.

Fixes: c752998b5e2e ("pci: introduce library and driver")
Cc: sta...@dpdk.org

Signed-off-by: Qi Zhang 
---
 drivers/bus/pci/linux/pci_vfio.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index aeeaa9ed8..dd25c3542 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -43,6 +43,17 @@ static struct rte_tailq_elem rte_vfio_tailq = {
 };
 EAL_REGISTER_TAILQ(rte_vfio_tailq)
 
+/* Compair two pci address */
+static int pci_addr_cmp(struct rte_pci_addr *addr1, struct rte_pci_addr *addr2)
+{
+   if (addr1->domain == addr2->domain &&
+   addr1->bus == addr2->bus &&
+   addr1->devid == addr2->devid &&
+   addr1->function == addr2->function)
+   return 0;
+   return 1;
+}
+
 int
 pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
void *buf, size_t len, off_t offs)
@@ -642,7 +653,7 @@ pci_vfio_unmap_resource(struct rte_pci_device *dev)
vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, 
mapped_pci_res_list);
/* Get vfio_res */
TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
-   if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
+   if (pci_addr_cmp(&vfio_res->pci_addr, &dev->addr))
continue;
break;
}
-- 
2.13.6



[dpdk-dev] [PATCH v9 08/19] net/ixgbe: enable port detach on secondary process

2018-07-06 Thread Qi Zhang
Previously, detach port on a secondary process will mess primary
process and cause the same device can't be attached back again.
A secondary process should use rte_eth_release_port_private to
release a port.

Signed-off-by: Qi Zhang 
---
 drivers/net/ixgbe/ixgbe_ethdev.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 87d2ad090..161a15f05 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -1792,6 +1792,9 @@ static int eth_ixgbe_pci_remove(struct rte_pci_device 
*pci_dev)
if (!ethdev)
return -ENODEV;
 
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+   return rte_eth_dev_release_port_private(ethdev);
+
if (ethdev->data->dev_flags & RTE_ETH_DEV_REPRESENTOR)
return rte_eth_dev_destroy(ethdev, ixgbe_vf_representor_uninit);
else
-- 
2.13.6



[dpdk-dev] [PATCH v9 04/19] vfio: remove uneccessary IPC for group fd clear

2018-07-06 Thread Qi Zhang
Clear vfio_group_fd is not necessary to involve any IPC.
Also, current IPC implementation for SOCKET_CLR_GROUP is not
correct. rte_vfio_clear_group on secondary will always fail,
that prevent device be detached correctly on a secondary process.
The patch simply removes all IPC related stuff in
rte_vfio_clear_group.

Signed-off-by: Qi Zhang 
---
 lib/librte_eal/linuxapp/eal/eal_vfio.c | 45 +-
 lib/librte_eal/linuxapp/eal/eal_vfio.h |  1 -
 lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c |  8 -
 3 files changed, 8 insertions(+), 46 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c 
b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index a2bbdfbf4..c0eccddc3 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -575,10 +575,6 @@ int
 rte_vfio_clear_group(int vfio_group_fd)
 {
int i;
-   struct rte_mp_msg mp_req, *mp_rep;
-   struct rte_mp_reply mp_reply;
-   struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
-   struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
struct vfio_config *vfio_cfg;
 
vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);
@@ -587,40 +583,15 @@ rte_vfio_clear_group(int vfio_group_fd)
return -1;
}
 
-   if (internal_config.process_type == RTE_PROC_PRIMARY) {
-
-   i = get_vfio_group_idx(vfio_group_fd);
-   if (i < 0)
-   return -1;
-   vfio_cfg->vfio_groups[i].group_num = -1;
-   vfio_cfg->vfio_groups[i].fd = -1;
-   vfio_cfg->vfio_groups[i].devices = 0;
-   vfio_cfg->vfio_active_groups--;
-   return 0;
-   }
-
-   p->req = SOCKET_CLR_GROUP;
-   p->group_num = vfio_group_fd;
-   strcpy(mp_req.name, EAL_VFIO_MP);
-   mp_req.len_param = sizeof(*p);
-   mp_req.num_fds = 0;
-
-   if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
-   mp_reply.nb_received == 1) {
-   mp_rep = &mp_reply.msgs[0];
-   p = (struct vfio_mp_param *)mp_rep->param;
-   if (p->result == SOCKET_OK) {
-   free(mp_reply.msgs);
-   return 0;
-   } else if (p->result == SOCKET_NO_FD)
-   RTE_LOG(ERR, EAL, "  BAD VFIO group fd!\n");
-   else
-   RTE_LOG(ERR, EAL, "  no such VFIO group fd!\n");
-
-   free(mp_reply.msgs);
-   }
+   i = get_vfio_group_idx(vfio_group_fd);
+   if (i < 0)
+   return -1;
+   vfio_cfg->vfio_groups[i].group_num = -1;
+   vfio_cfg->vfio_groups[i].fd = -1;
+   vfio_cfg->vfio_groups[i].devices = 0;
+   vfio_cfg->vfio_active_groups--;
 
-   return -1;
+   return 0;
 }
 
 int
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h 
b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index e65b10374..68d4750a5 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -129,7 +129,6 @@ int vfio_mp_sync_setup(void);
 
 #define SOCKET_REQ_CONTAINER 0x100
 #define SOCKET_REQ_GROUP 0x200
-#define SOCKET_CLR_GROUP 0x300
 #define SOCKET_OK 0x0
 #define SOCKET_NO_FD 0x1
 #define SOCKET_ERR 0xFF
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c 
b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
index 9c202bb08..680a24aae 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
@@ -55,14 +55,6 @@ vfio_mp_primary(const struct rte_mp_msg *msg, const void 
*peer)
reply.fds[0] = fd;
}
break;
-   case SOCKET_CLR_GROUP:
-   r->req = SOCKET_CLR_GROUP;
-   r->group_num = m->group_num;
-   if (rte_vfio_clear_group(m->group_num) < 0)
-   r->result = SOCKET_NO_FD;
-   else
-   r->result = SOCKET_OK;
-   break;
case SOCKET_REQ_CONTAINER:
r->req = SOCKET_REQ_CONTAINER;
fd = rte_vfio_get_container_fd();
-- 
2.13.6



[dpdk-dev] [PATCH v9 07/19] net/i40e: enable port detach on secondary process

2018-07-06 Thread Qi Zhang
Previously, detach port on a secondary process will mess primary
process and cause the same device can't be attached back again.
A secondary process should use rte_eth_release_port_private to
release a port.

Signed-off-by: Qi Zhang 
---
 drivers/net/i40e/i40e_ethdev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 13c5d3296..7d1f98422 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -678,6 +678,8 @@ static int eth_i40e_pci_remove(struct rte_pci_device 
*pci_dev)
if (!ethdev)
return -ENODEV;
 
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+   return rte_eth_dev_release_port_private(ethdev);
 
if (ethdev->data->dev_flags & RTE_ETH_DEV_REPRESENTOR)
return rte_eth_dev_destroy(ethdev, i40e_vf_representor_uninit);
-- 
2.13.6



[dpdk-dev] [PATCH v9 05/19] eal: enable hotplug on multi-process

2018-07-06 Thread Qi Zhang
We are going to introduce the solution to handle hotplug in
multi-process, it includes the below scenario:

1. Attach a device from the primary
2. Detach a device from the primary
3. Attach a device from a secondary
4. Detach a device from a secondary

In the primary-secondary process model, we assume devices are shared
by default. that means attaches or detaches a device on any process
will broadcast to all other processes through mp channel then device
information will be synchronized on all processes.

Any failure during attaching/detaching process will cause inconsistent
status between processes, so proper rollback action should be considered.

This patch covers the implementation of case 1,2.
Case 3,4 will be implemented on a separate patch.

IPC scenario for Case 1, 2:

attach a device
a) primary attach the new device if failed goto h).
b) primary send attach sync request to all secondary.
c) secondary receive request and attach the device and send a reply.
d) primary check the reply if all success goes to i).
e) primary send attach rollback sync request to all secondary.
f) secondary receive the request and detach the device and send a reply.
g) primary receive the reply and detach device as rollback action.
h) attach fail
i) attach success

detach a device
a) primary send detach sync request to all secondary
b) secondary detach the device and send reply
c) primary check the reply if all success goes to f).
d) primary send detach rollback sync request to all secondary.
e) secondary receive the request and attach back device. goto g)
f) primary detach the device if success goto g), else goto d)
g) detach fail.
h) detach success.

Signed-off-by: Qi Zhang 
---
 lib/librte_eal/bsdapp/eal/Makefile  |   1 +
 lib/librte_eal/common/eal_common_dev.c  | 140 +++-
 lib/librte_eal/common/eal_private.h |  37 +++
 lib/librte_eal/common/hotplug_mp.c  | 181 
 lib/librte_eal/common/hotplug_mp.h  |  44 
 lib/librte_eal/common/include/rte_bus.h |   3 +
 lib/librte_eal/common/include/rte_dev.h |   9 ++
 lib/librte_eal/common/meson.build   |   1 +
 lib/librte_eal/linuxapp/eal/Makefile|   1 +
 lib/librte_eal/linuxapp/eal/eal.c   |   6 ++
 10 files changed, 418 insertions(+), 5 deletions(-)
 create mode 100644 lib/librte_eal/common/hotplug_mp.c
 create mode 100644 lib/librte_eal/common/hotplug_mp.h

diff --git a/lib/librte_eal/bsdapp/eal/Makefile 
b/lib/librte_eal/bsdapp/eal/Makefile
index 3fd33f1e4..4ecc73b42 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -59,6 +59,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_thread.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_proc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_fbarray.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += hotplug_mp.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_mp.c
diff --git a/lib/librte_eal/common/eal_common_dev.c 
b/lib/librte_eal/common/eal_common_dev.c
index 14c5f05fa..fb1a122ae 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -16,8 +16,10 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "eal_private.h"
+#include "hotplug_mp.h"
 
 /**
  * The device event callback description.
@@ -102,8 +104,9 @@ int rte_eal_dev_detach(struct rte_device *dev)
return ret;
 }
 
-int __rte_experimental rte_eal_hotplug_add(const char *busname, const char 
*devname,
-   const char *devargs)
+int
+do_dev_hotplug_add(const char *busname, const char *devname,
+   const char *devargs)
 {
struct rte_bus *bus;
struct rte_device *dev;
@@ -168,8 +171,7 @@ int __rte_experimental rte_eal_hotplug_add(const char 
*busname, const char *devn
return ret;
 }
 
-int __rte_experimental
-rte_eal_hotplug_remove(const char *busname, const char *devname)
+int do_dev_hotplug_remove(const char *busname, const char *devname)
 {
struct rte_bus *bus;
struct rte_device *dev;
@@ -197,11 +199,139 @@ rte_eal_hotplug_remove(const char *busname, const char 
*devname)
if (ret)
RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
dev->name);
-   rte_devargs_remove(busname, devname);
+   else
+   rte_devargs_remove(busname, devname);
+
return ret;
 }
 
 int __rte_experimental
+rte_eal_hotplug_add(const char *busname, const char *devname,
+   const char *devargs)
+{
+   struct eal_dev_mp_req req;
+   int ret;
+
+   memset(&req, 0, sizeof(req));
+   req.t = EAL_DEV_REQ_TYPE_ATTACH;
+   strlcpy(req.busname, busname, RTE_BUS_NAME_MAX_LEN);
+   strlcpy(req.devname, devname, RTE_DEV_NAME_MAX_LEN);
+   strlcpy(req.devargs, devargs, RTE_DEV_ARGS_MAX_LEN);

[dpdk-dev] [PATCH v9 03/19] bus/pci: enable vfio unmap resource for secondary

2018-07-06 Thread Qi Zhang
Subroutine to unmap VFIO resource is shared by secondary and
primary, and it does not work on the secondary process.
The patch adds a dedicate function to handle the situation
when a device is unmapped on a secondary process.

Signed-off-by: Qi Zhang 
---
 drivers/bus/pci/linux/pci_vfio.c | 75 ++--
 1 file changed, 73 insertions(+), 2 deletions(-)

diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index dd25c3542..72481ac45 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -595,6 +595,9 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
dev->mem_resource[i].addr = maps[i].addr;
}
 
+   /* we need save vfio_dev_fd, so it can be used during release */
+   dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
+
return 0;
 err_vfio_dev_fd:
close(vfio_dev_fd);
@@ -614,8 +617,8 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
return pci_vfio_map_resource_secondary(dev);
 }
 
-int
-pci_vfio_unmap_resource(struct rte_pci_device *dev)
+static int
+pci_vfio_unmap_resource_primary(struct rte_pci_device *dev)
 {
char pci_addr[PATH_MAX] = {0};
struct rte_pci_addr *loc = &dev->addr;
@@ -687,6 +690,74 @@ pci_vfio_unmap_resource(struct rte_pci_device *dev)
return 0;
 }
 
+static int
+pci_vfio_unmap_resource_secondary(struct rte_pci_device *dev)
+{
+   char pci_addr[PATH_MAX] = {0};
+   struct rte_pci_addr *loc = &dev->addr;
+   int i, ret;
+   struct mapped_pci_resource *vfio_res = NULL;
+   struct mapped_pci_res_list *vfio_res_list;
+
+   struct pci_map *maps;
+
+   /* store PCI address string */
+   snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
+   loc->domain, loc->bus, loc->devid, loc->function);
+
+   ret = rte_vfio_release_device(rte_pci_get_sysfs_path(), pci_addr,
+ dev->intr_handle.vfio_dev_fd);
+   if (ret < 0) {
+   RTE_LOG(ERR, EAL,
+   "%s(): cannot release device\n", __func__);
+   return ret;
+   }
+
+   vfio_res_list =
+   RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
+   /* Get vfio_res */
+   TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
+   if (pci_addr_cmp(&vfio_res->pci_addr, &dev->addr))
+   continue;
+   break;
+   }
+   /* if we haven't found our tailq entry, something's wrong */
+   if (vfio_res == NULL) {
+   RTE_LOG(ERR, EAL, "  %s cannot find TAILQ entry for PCI 
device!\n",
+   pci_addr);
+   return -1;
+   }
+
+   /* unmap BARs */
+   maps = vfio_res->maps;
+
+   RTE_LOG(INFO, EAL, "Releasing pci mapped resource for %s\n",
+   pci_addr);
+   for (i = 0; i < (int) vfio_res->nb_maps; i++) {
+
+   /*
+* We do not need to be aware of MSI-X table BAR mappings as
+* when mapping. Just using current maps array is enough
+*/
+   if (maps[i].addr) {
+   RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s 
at %p\n",
+   pci_addr, maps[i].addr);
+   pci_unmap_resource(maps[i].addr, maps[i].size);
+   }
+   }
+
+   return 0;
+}
+
+int
+pci_vfio_unmap_resource(struct rte_pci_device *dev)
+{
+   if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+   return pci_vfio_unmap_resource_primary(dev);
+   else
+   return pci_vfio_unmap_resource_secondary(dev);
+}
+
 int
 pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
struct rte_pci_ioport *p)
-- 
2.13.6



[dpdk-dev] [PATCH v9 01/19] ethdev: add function to release port in local process

2018-07-06 Thread Qi Zhang
Add driver API rte_eth_release_port_private to support the
case when an ethdev need to be detached on a secondary process.
Local state is set to unused and shared data will not be reset
so the primary process can still use it.

Signed-off-by: Qi Zhang 
Reviewed-by: Andrew Rybchenko 
Acked-by: Remy Horton 
---
 lib/librte_ethdev/rte_ethdev.c| 12 
 lib/librte_ethdev/rte_ethdev_driver.h | 16 +++-
 lib/librte_ethdev/rte_ethdev_pci.h|  8 
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index a9977df97..52a97694c 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -359,6 +359,18 @@ rte_eth_dev_attach_secondary(const char *name)
 }
 
 int
+rte_eth_dev_release_port_private(struct rte_eth_dev *eth_dev)
+{
+   if (eth_dev == NULL)
+   return -EINVAL;
+
+   _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_DESTROY, NULL);
+   eth_dev->state = RTE_ETH_DEV_UNUSED;
+
+   return 0;
+}
+
+int
 rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
 {
if (eth_dev == NULL)
diff --git a/lib/librte_ethdev/rte_ethdev_driver.h 
b/lib/librte_ethdev/rte_ethdev_driver.h
index c9c825e3f..269586d88 100644
--- a/lib/librte_ethdev/rte_ethdev_driver.h
+++ b/lib/librte_ethdev/rte_ethdev_driver.h
@@ -62,7 +62,7 @@ struct rte_eth_dev *rte_eth_dev_attach_secondary(const char 
*name);
  * Release the specified ethdev port.
  *
  * @param eth_dev
- * The *eth_dev* pointer is the address of the *rte_eth_dev* structure.
+ * Device to be detached.
  * @return
  *   - 0 on success, negative on error
  */
@@ -70,6 +70,20 @@ int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev);
 
 /**
  * @internal
+ * Release the specified ethdev port in the local process.
+ * Only set ethdev state to unused, but not reset shared data since
+ * it assume other processes is still using it. typically it is
+ * called by a secondary process.
+ *
+ * @param eth_dev
+ * Device to be detached.
+ * @return
+ *   - 0 on success, negative on error
+ */
+int rte_eth_dev_release_port_private(struct rte_eth_dev *eth_dev);
+
+/**
+ * @internal
  * Release device queues and clear its configuration to force the user
  * application to reconfigure it. It is for internal use only.
  *
diff --git a/lib/librte_ethdev/rte_ethdev_pci.h 
b/lib/librte_ethdev/rte_ethdev_pci.h
index 2cfd37274..a46d9e182 100644
--- a/lib/librte_ethdev/rte_ethdev_pci.h
+++ b/lib/librte_ethdev/rte_ethdev_pci.h
@@ -197,6 +197,14 @@ rte_eth_dev_pci_generic_remove(struct rte_pci_device 
*pci_dev,
if (!eth_dev)
return -ENODEV;
 
+   /**
+* PCI device can only be globally detached directly by a
+* primary process. In secondary process, we only need to
+* release port.
+*/
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+   return rte_eth_dev_release_port_private(eth_dev);
+
if (dev_uninit) {
ret = dev_uninit(eth_dev);
if (ret)
-- 
2.13.6



[dpdk-dev] [PATCH v9 11/19] net/kni: enable port detach on secondary process

2018-07-06 Thread Qi Zhang
Previously, detach port on a secondary process will mess primary
process and cause the same device can't be attached back again.
A secondary process should use rte_eth_release_port_private to
release a port.

Signed-off-by: Qi Zhang 
---
 drivers/net/kni/rte_eth_kni.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/kni/rte_eth_kni.c b/drivers/net/kni/rte_eth_kni.c
index ab63ea427..e5679c76a 100644
--- a/drivers/net/kni/rte_eth_kni.c
+++ b/drivers/net/kni/rte_eth_kni.c
@@ -419,6 +419,7 @@ eth_kni_probe(struct rte_vdev_device *vdev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = ð_kni_ops;
+   eth_dev->device = &vdev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -463,6 +464,16 @@ eth_kni_remove(struct rte_vdev_device *vdev)
if (eth_dev == NULL)
return -1;
 
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+   /* detach device on local pprocess only */
+   if (strlen(rte_vdev_device_args(vdev)) == 0)
+   return rte_eth_dev_release_port_private(eth_dev);
+   /**
+* else this is a private device for current process
+* so continue with normal detach scenario
+*/
+   }
+
eth_kni_dev_stop(eth_dev);
 
internals = eth_dev->data->dev_private;
-- 
2.13.6



[dpdk-dev] [PATCH v9 06/19] eal: support attach or detach share device from secondary

2018-07-06 Thread Qi Zhang
This patch cover the multi-process hotplug case when a device
attach/detach request be issued from a secondary process

device attach on secondary:
a) secondary send sync request to the primary.
b) primary receive the request and attach the new device if
   failed goto i).
c) primary forward attach sync request to all secondary.
d) secondary receive the request and attach the device and send a reply.
e) primary check the reply if all success goes to j).
f) primary send attach rollback sync request to all secondary.
g) secondary receive the request and detach the device and send a reply.
h) primary receive the reply and detach device as rollback action.
i) send attach fail to secondary as a reply of step a), goto k).
j) send attach success to secondary as a reply of step a).
k) secondary receive reply and return.

device detach on secondary:
a) secondary send sync request to the primary.
b) primary send detach sync request to all secondary.
c) secondary detach the device and send a reply.
d) primary check the reply if all success goes to g).
e) primary send detach rollback sync request to all secondary.
f) secondary receive the request and attach back device. goto h).
g) primary detach the device if success goto i), else goto e).
h) primary send detach fail to secondary as a reply of step a), goto j).
i) primary send detach success to secondary as a reply of step a).
j) secondary receive reply and return.

Signed-off-by: Qi Zhang 
---
 lib/librte_eal/common/eal_common_dev.c |  36 ++-
 lib/librte_eal/common/hotplug_mp.c | 175 -
 2 files changed, 202 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_dev.c 
b/lib/librte_eal/common/eal_common_dev.c
index fb1a122ae..195e1fe00 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -218,8 +218,22 @@ rte_eal_hotplug_add(const char *busname, const char 
*devname,
strlcpy(req.devname, devname, RTE_DEV_NAME_MAX_LEN);
strlcpy(req.devargs, devargs, RTE_DEV_ARGS_MAX_LEN);
 
-   if (rte_eal_process_type() != RTE_PROC_PRIMARY)
-   return -ENOTSUP;
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+   /**
+* If in secondary process, just send IPC request to
+* primary process.
+*/
+   ret = eal_dev_hotplug_request_to_primary(&req);
+   if (ret) {
+   RTE_LOG(ERR, EAL,
+   "Failed to send hotplug request to primary\n");
+   return ret;
+   }
+   if (req.result)
+   RTE_LOG(ERR, EAL,
+   "Failed to hotplug add device\n");
+   return req.result;
+   }
 
/**
 * attach a device from primary start from here:
@@ -279,8 +293,22 @@ rte_eal_hotplug_remove(const char *busname, const char 
*devname)
strlcpy(req.busname, busname, RTE_BUS_NAME_MAX_LEN);
strlcpy(req.devname, devname, RTE_DEV_NAME_MAX_LEN);
 
-   if (rte_eal_process_type() != RTE_PROC_PRIMARY)
-   return -ENOTSUP;
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+   /**
+* If in secondary process, just send IPC request to
+* primary process.
+*/
+   ret = eal_dev_hotplug_request_to_primary(&req);
+   if (ret) {
+   RTE_LOG(ERR, EAL,
+   "Failed to send hotplug request to primary\n");
+   return ret;
+   }
+   if (req.result)
+   RTE_LOG(ERR, EAL,
+   "Failed to hotplug remove device\n");
+   return req.result;
+   }
 
/**
 * detach a device from primary start from here:
diff --git a/lib/librte_eal/common/hotplug_mp.c 
b/lib/librte_eal/common/hotplug_mp.c
index 261d17fe6..68ca18bbe 100644
--- a/lib/librte_eal/common/hotplug_mp.c
+++ b/lib/librte_eal/common/hotplug_mp.c
@@ -17,12 +17,158 @@ struct mp_reply_bundle {
void *peer;
 };
 
+/**
+ * Secondary to primary request.
+ * start from function eal_dev_hotplug_request_to_primary.
+ *
+ * device attach on secondary:
+ * a) secondary send sync request to the primary.
+ * b) primary receive the request and attach the new device if
+ *failed goto i).
+ * c) primary forward attach sync request to all secondary.
+ * d) secondary receive the request and attach the device and send a reply.
+ * e) primary check the reply if all success goes to j).
+ * f) primary send attach rollback sync request to all secondary.
+ * g) secondary receive the request and detach the device and send a reply.
+ * h) primary receive the reply and detach device as rollback action.
+ * i) send attach fail to secondary as a reply of step a), goto k).
+ * j) send attach success to secondary as a reply of s

[dpdk-dev] [PATCH v9 12/19] net/null: enable port detach on secondary process

2018-07-06 Thread Qi Zhang
Previously, detach port on a secondary process will mess primary
process and cause the same device can't be attached back again.
A secondary process should use rte_eth_release_port_private to
release a port.

Signed-off-by: Qi Zhang 
---
 drivers/net/null/rte_eth_null.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 1d2e6b9e9..2f040729b 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -623,6 +623,7 @@ rte_pmd_null_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+   eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -667,18 +668,31 @@ static int
 rte_pmd_null_remove(struct rte_vdev_device *dev)
 {
struct rte_eth_dev *eth_dev = NULL;
+   const char *name;
 
if (!dev)
return -EINVAL;
 
+   name = rte_vdev_device_name(dev);
+
PMD_LOG(INFO, "Closing null ethdev on numa socket %u",
rte_socket_id());
 
/* find the ethdev entry */
-   eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
+   eth_dev = rte_eth_dev_allocated(name);
if (eth_dev == NULL)
return -1;
 
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+   /* detach device on local pprocess only */
+   if (strlen(rte_vdev_device_args(dev)) == 0)
+   return rte_eth_dev_release_port_private(eth_dev);
+   /**
+* else this is a private device for current process
+* so continue with normal detach scenario
+*/
+   }
+
rte_free(eth_dev->data->dev_private);
 
rte_eth_dev_release_port(eth_dev);
-- 
2.13.6



[dpdk-dev] [PATCH v9 13/19] net/octeontx: enable port detach on secondary process

2018-07-06 Thread Qi Zhang
Previously, detach port on a secondary process will mess primary
process and cause the same device can't be attached back again.
A secondary process should use rte_eth_release_port_private to
release a port.

Signed-off-by: Qi Zhang 
---
 drivers/net/octeontx/octeontx_ethdev.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/net/octeontx/octeontx_ethdev.c 
b/drivers/net/octeontx/octeontx_ethdev.c
index 1eb453b21..497bacdc6 100644
--- a/drivers/net/octeontx/octeontx_ethdev.c
+++ b/drivers/net/octeontx/octeontx_ethdev.c
@@ -1016,6 +1016,7 @@ octeontx_create(struct rte_vdev_device *dev, int port, 
uint8_t evdev,
 
eth_dev->tx_pkt_burst = octeontx_xmit_pkts;
eth_dev->rx_pkt_burst = octeontx_recv_pkts;
+   eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -1138,6 +1139,18 @@ octeontx_remove(struct rte_vdev_device *dev)
if (eth_dev == NULL)
return -ENODEV;
 
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+   /* detach device on local pprocess only */
+   if (strlen(rte_vdev_device_args(dev)) == 0) {
+   rte_eth_dev_release_port_private(eth_dev);
+   continue;
+   }
+   /**
+* else this is a private device for current process
+* so continue with normal detach scenario
+*/
+   }
+
nic = octeontx_pmd_priv(eth_dev);
rte_event_dev_stop(nic->evdev);
PMD_INIT_LOG(INFO, "Closing octeontx device %s", octtx_name);
@@ -1148,6 +1161,9 @@ octeontx_remove(struct rte_vdev_device *dev)
rte_event_dev_close(nic->evdev);
}
 
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+   return 0;
+
/* Free FC resource */
octeontx_pko_fc_free();
 
-- 
2.13.6



[dpdk-dev] [PATCH v9 14/19] net/pcap: enable port detach on secondary process

2018-07-06 Thread Qi Zhang
Previously, detach port on a secondary process will mess primary
process and cause the same device can't be attached back again.
A secondary process should use rte_eth_release_port_private to
release a port.

Signed-off-by: Qi Zhang 
---
 drivers/net/pcap/rte_eth_pcap.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/pcap/rte_eth_pcap.c b/drivers/net/pcap/rte_eth_pcap.c
index 6bd4a7d79..6cc20c2b2 100644
--- a/drivers/net/pcap/rte_eth_pcap.c
+++ b/drivers/net/pcap/rte_eth_pcap.c
@@ -925,6 +925,7 @@ pmd_pcap_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+   eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -1016,6 +1017,7 @@ static int
 pmd_pcap_remove(struct rte_vdev_device *dev)
 {
struct rte_eth_dev *eth_dev = NULL;
+   const char *name;
 
PMD_LOG(INFO, "Closing pcap ethdev on numa socket %d",
rte_socket_id());
@@ -1023,11 +1025,22 @@ pmd_pcap_remove(struct rte_vdev_device *dev)
if (!dev)
return -1;
 
+   name = rte_vdev_device_name(dev);
/* reserve an ethdev entry */
-   eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
+   eth_dev = rte_eth_dev_allocated(name);
if (eth_dev == NULL)
return -1;
 
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+   /* detach device on local pprocess only */
+   if (strlen(rte_vdev_device_args(dev)) == 0)
+   return rte_eth_dev_release_port_private(eth_dev);
+   /**
+* else this is a private device for current process
+* so continue with normal detach scenario
+*/
+   }
+
rte_free(eth_dev->data->dev_private);
 
rte_eth_dev_release_port(eth_dev);
-- 
2.13.6



[dpdk-dev] [PATCH v9 10/19] net/bonding: enable port detach on secondary process

2018-07-06 Thread Qi Zhang
Previously, detach port on a secondary process will mess primary
process and cause the same device can't be attached back again.
A secondary process should use rte_eth_release_port_private to
release a port.

Signed-off-by: Qi Zhang 
---
 drivers/net/bonding/rte_eth_bond_pmd.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c 
b/drivers/net/bonding/rte_eth_bond_pmd.c
index f155ff779..da45ba9ba 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -3062,6 +3062,7 @@ bond_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &default_dev_ops;
+   eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -3168,6 +3169,16 @@ bond_remove(struct rte_vdev_device *dev)
if (eth_dev == NULL)
return -ENODEV;
 
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+   /* detach device on local pprocess only */
+   if (strlen(rte_vdev_device_args(dev)) == 0)
+   return rte_eth_dev_release_port_private(eth_dev);
+   /**
+* else this is a private device for current process
+* so continue with normal detach scenario
+*/
+   }
+
RTE_ASSERT(eth_dev->device == &dev->device);
 
internals = eth_dev->data->dev_private;
-- 
2.13.6



[dpdk-dev] [PATCH v9 15/19] net/softnic: enable port detach on secondary process

2018-07-06 Thread Qi Zhang
Previously, detach port on a secondary process will mess primary
process and cause the same device can't be attached back again.
A secondary process should use rte_eth_release_port_private to
release a port.

Signed-off-by: Qi Zhang 
---
 drivers/net/softnic/rte_eth_softnic.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/net/softnic/rte_eth_softnic.c 
b/drivers/net/softnic/rte_eth_softnic.c
index 6b3c13e5c..a45a7b0dd 100644
--- a/drivers/net/softnic/rte_eth_softnic.c
+++ b/drivers/net/softnic/rte_eth_softnic.c
@@ -750,6 +750,7 @@ pmd_probe(struct rte_vdev_device *vdev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &pmd_ops;
+   eth_dev->device = &vdev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -803,17 +804,29 @@ pmd_remove(struct rte_vdev_device *vdev)
 {
struct rte_eth_dev *dev = NULL;
struct pmd_internals *p;
+   const char *name;
 
if (!vdev)
return -EINVAL;
 
-   PMD_LOG(INFO, "Removing device \"%s\"",
-   rte_vdev_device_name(vdev));
+   name = rte_vdev_device_name(vdev);
+   PMD_LOG(INFO, "Removing device \"%s\"", name);
 
/* Find the ethdev entry */
-   dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
+   dev = rte_eth_dev_allocated(name);
if (dev == NULL)
return -ENODEV;
+
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+   /* detach device on local pprocess only */
+   if (strlen(rte_vdev_device_args(vdev)) == 0)
+   return rte_eth_dev_release_port_private(dev);
+   /**
+* else this is a private device for current process
+* so continue with normal detach scenario
+*/
+   }
+
p = dev->data->dev_private;
 
/* Free device data structures*/
-- 
2.13.6



[dpdk-dev] [PATCH v9 17/19] net/vhost: enable port detach on secondary process

2018-07-06 Thread Qi Zhang
Previously, detach port on a secondary process will mess primary
process and cause the same device can't be attached back again.
A secondary process should use rte_eth_release_port_private to
release a port.

Signed-off-by: Qi Zhang 
---
 drivers/net/vhost/rte_eth_vhost.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/vhost/rte_eth_vhost.c 
b/drivers/net/vhost/rte_eth_vhost.c
index ba9d768a0..f773711b4 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -1353,6 +1353,7 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev)
}
/* TODO: request info from primary to set up Rx and Tx */
eth_dev->dev_ops = &ops;
+   eth_dev->device = &dev->device;
rte_eth_dev_probing_finish(eth_dev);
return 0;
}
@@ -1435,6 +1436,16 @@ rte_pmd_vhost_remove(struct rte_vdev_device *dev)
if (eth_dev == NULL)
return -ENODEV;
 
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+   /* detach device on local pprocess only */
+   if (strlen(rte_vdev_device_args(dev)) == 0)
+   return rte_eth_dev_release_port_private(eth_dev);
+   /**
+* else this is a private device for current process
+* so continue with normal detach scenario
+*/
+   }
+
eth_dev_close(eth_dev);
 
rte_free(vring_states[eth_dev->data->port_id]);
-- 
2.13.6



[dpdk-dev] [PATCH v9 18/19] examples/multi_process: add hotplug sample

2018-07-06 Thread Qi Zhang
The sample code demonstrates device (ethdev only) management
at a multi-process environment. The user can attach/detach a
device on primary process and see it is synced on secondary
process automatically.

How to start?
./hotplug_mp --proc-type=auto

Command Line Example:

>help
>list

/* attach a af_packet vdev */
>attach net_af_packet,iface=eth0

/* detach port 0 */
>detach 0

Signed-off-by: Qi Zhang 
---
 examples/multi_process/Makefile  |   1 +
 examples/multi_process/hotplug_mp/Makefile   |  23 
 examples/multi_process/hotplug_mp/commands.c | 197 +++
 examples/multi_process/hotplug_mp/commands.h |  10 ++
 examples/multi_process/hotplug_mp/main.c |  41 ++
 5 files changed, 272 insertions(+)
 create mode 100644 examples/multi_process/hotplug_mp/Makefile
 create mode 100644 examples/multi_process/hotplug_mp/commands.c
 create mode 100644 examples/multi_process/hotplug_mp/commands.h
 create mode 100644 examples/multi_process/hotplug_mp/main.c

diff --git a/examples/multi_process/Makefile b/examples/multi_process/Makefile
index a6708b7e4..b76b02fcb 100644
--- a/examples/multi_process/Makefile
+++ b/examples/multi_process/Makefile
@@ -13,5 +13,6 @@ include $(RTE_SDK)/mk/rte.vars.mk
 DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += client_server_mp
 DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += simple_mp
 DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += symmetric_mp
+DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += hotplug_mp
 
 include $(RTE_SDK)/mk/rte.extsubdir.mk
diff --git a/examples/multi_process/hotplug_mp/Makefile 
b/examples/multi_process/hotplug_mp/Makefile
new file mode 100644
index 0..c09a57bfa
--- /dev/null
+++ b/examples/multi_process/hotplug_mp/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2010-2014 Intel Corporation
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = hotplug_mp
+
+# all source are stored in SRCS-y
+SRCS-y := main.c commands.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/multi_process/hotplug_mp/commands.c 
b/examples/multi_process/hotplug_mp/commands.c
new file mode 100644
index 0..fb7198d51
--- /dev/null
+++ b/examples/multi_process/hotplug_mp/commands.c
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**/
+
+struct cmd_help_result {
+   cmdline_fixed_string_t help;
+};
+
+static void cmd_help_parsed(__attribute__((unused)) void *parsed_result,
+   struct cmdline *cl,
+   __attribute__((unused)) void *data)
+{
+   cmdline_printf(cl,
+  "commands:\n"
+  "- attach \n"
+  "- detach \n"
+  "- list\n\n");
+}
+
+cmdline_parse_token_string_t cmd_help_help =
+   TOKEN_STRING_INITIALIZER(struct cmd_help_result, help, "help");
+
+cmdline_parse_inst_t cmd_help = {
+   .f = cmd_help_parsed,  /* function to call */
+   .data = NULL,  /* 2nd arg of func */
+   .help_str = "show help",
+   .tokens = {/* token list, NULL terminated */
+   (void *)&cmd_help_help,
+   NULL,
+   },
+};
+
+/**/
+
+struct cmd_quit_result {
+   cmdline_fixed_string_t quit;
+};
+
+static void cmd_quit_parsed(__attribute__((unused)) void *parsed_result,
+   struct cmdline *cl,
+   __attribute__((unused)) void *data)
+{
+   cmdline_quit(cl);
+}
+
+cmdline_parse_token_string_t cmd_quit_quit =
+   TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit");
+
+cmdline_parse_inst_t cmd_quit = {
+   .f = cmd_quit_parsed,  /* function to call */
+   .data = NULL,  /* 2nd arg of func */
+   .help_str = "quit",
+   .tokens = {/* token list, NULL terminated */
+   (void *)&cmd_quit_quit,
+   NULL,
+   },
+};
+
+/**/
+
+struct cmd_list_result {
+   cmdline_fixed_string_t list;
+};
+
+static void cmd_list_parsed(__attribute__((unused)) void *parsed_result,
+   struct cmdline *cl,
+   __attribute__((unused)) void *data)
+{
+   uint16_t port_id;
+   char dev_name[RTE_DEV_NAME_MAX_LEN];
+
+   cmdline_printf(cl, "list all etherdev\n");
+
+   RTE_ETH_FOREACH_DEV(port_id) {
+   rte_eth_dev_get_name_by_port(port_id, dev_name);
+   if (strlen(dev_name) > 0)
+

  1   2   >