[dpdk-dev] [PATCH 1/2] testpmd: add forwarding mode to simulate a noisy neighbour

2018-04-19 Thread Jens Freimann
This adds a new forwarding mode to testpmd to simulate
more realistic behavior of a guest machine engaged in receiving
and sending packets performing Virtual Network Function (VNF).

The goal is to enable a simple way of measuring performance impact on
cache and memory footprint utilization from various VNF co-located on
the same host machine. For this it does:

* Buffer packets in a FIFO:

Create a fifo to buffer received packets. Once it flows over put
those packets into the actual tx queue. The fifo is created per tx
queue and its size can be set with the --buffersize-before-sending
commandline parameter.

A second commandline parameter is used to set a timeout in
milliseconds after which the fifo is flushed.

--noisy-buffersize-before-sending [packet numbers]
Keep the mbuf in a FIFO and forward the over flooding packets from the
FIFO. This queue is per TX-queue (after all other packet processing).

--noisy-flush-timeout [delay]
Flush the packet queue if no packets have been seen during
[delay]. As long as packets are seen, the timer is reset.

Add several options to simulate route lookups (memory reads) in tables
that can be quite large, as well as route hit statistics update.
These options simulates the while stack traversal and
will trash the cache. Memory access is random.

* simulate route lookups:

Allocate a buffer and perform reads and writes on it as specified by
commandline options:

--noisy-memory-footprint [size]
Size of the VNF internal memory (MB), in which the random
read/write will be done, allocated by rte_malloc (hugepages).

--noisy-nb-rnd-write [num]
Number of random writes in memory per packet should be
performed, simulating hit-flags update. 64 bits per write,
all write in different cache lines.

--noisy-nb-rnd-read [num]
Number of random reads in memory per packet should be
performed, simulating FIB/table lookups. 64 bits per read,
all write in different cache lines.

--noisy-nb-rnd-read-write [num]
Number of random reads and writes in memory per packet should
be performed, simulating stats update. 64 bits per read-write, all
reads and writes in different cache lines.

Signed-off-by: Jens Freimann 
---
 app/test-pmd/Makefile |   1 +
 app/test-pmd/noisy_vnf.c  | 184 ++
 app/test-pmd/noisy_vnf.h  |  41 +++
 app/test-pmd/parameters.c |  55 +-
 app/test-pmd/testpmd.c|  68 +
 app/test-pmd/testpmd.h|  17 +
 6 files changed, 365 insertions(+), 1 deletion(-)
 create mode 100644 app/test-pmd/noisy_vnf.c
 create mode 100644 app/test-pmd/noisy_vnf.h

diff --git a/app/test-pmd/Makefile b/app/test-pmd/Makefile
index 60ae9b9c1..2dc07ab16 100644
--- a/app/test-pmd/Makefile
+++ b/app/test-pmd/Makefile
@@ -32,6 +32,7 @@ SRCS-y += rxonly.c
 SRCS-y += txonly.c
 SRCS-y += csumonly.c
 SRCS-y += icmpecho.c
+SRCS-y += noisy_vnf.c
 SRCS-$(CONFIG_RTE_LIBRTE_IEEE1588) += ieee1588fwd.c
 
 ifeq ($(CONFIG_RTE_LIBRTE_PMD_SOFTNIC)$(CONFIG_RTE_LIBRTE_SCHED),yy)
diff --git a/app/test-pmd/noisy_vnf.c b/app/test-pmd/noisy_vnf.c
new file mode 100644
index 0..c480d31b2
--- /dev/null
+++ b/app/test-pmd/noisy_vnf.c
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Red Hat Corp.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "testpmd.h"
+#include "noisy_vnf.h"
+
+static inline void
+do_write(char *vnf_mem)
+{
+   uint64_t i = rte_rand();
+   uint64_t w = rte_rand();
+
+   vnf_mem[i % ((noisy_vnf_memory_footprint * 1024 * 1024) /
+   RTE_CACHE_LINE_SIZE)] = w;
+}
+
+   static inline void
+do_read(char *vnf_mem)
+{
+   uint64_t i = rte_rand();
+   uint64_t r;
+
+   r = vnf_mem[i % ((noisy_vnf_memory_footprint * 1024 * 1024) /
+   RTE_CACHE_LINE_SIZE)];
+   r++;
+}
+
+   static inline void
+do_rw(char *vnf_mem)
+{
+   do_read(vnf_mem);
+   do_write(vnf_mem);
+}
+
+/*
+ * Simulate route lookups as defined by commandline parameters
+ */
+   static void
+sim_memory_lookups(struct noisy_config *ncf, uint16_t nb_pkts)
+{
+   uint16_t i, j;
+
+   for (i = 0; i < nb_pkts; i++) {
+   for (j = 0; j < noisy_nb_rnd_write; j++)
+   do_write(ncf->vnf_mem);
+   for (j = 0; j < noisy_nb_rnd_read; j++)
+   do_read(ncf->vnf_mem);
+   for (j = 0; j < noisy_nb_rnd_read_write; j++)
+   do_rw(ncf->vnf_mem);
+   }
+}
+
+/*
+ * Forwarding of packets in I/O mode.
+ * Forward packets "as-is".
+ * This is the fastest possible forwarding operation, as it does not access
+ * to packets data.
+ */
+static void
+pkt_burst_noisy_vnf(struct fwd_stream *fs)
+{
+   struct rte_mbuf *pkts_burs

[dpdk-dev] [PATCH 2/2] testpmd: update testpmd documentation to include noisy forwarding mode

2018-04-19 Thread Jens Freimann
Signed-off-by: Jens Freimann 
---
 doc/guides/testpmd_app_ug/run_app.rst   | 27 +++
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |  7 +--
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/doc/guides/testpmd_app_ug/run_app.rst 
b/doc/guides/testpmd_app_ug/run_app.rst
index 1fd53958a..7fa5fc909 100644
--- a/doc/guides/testpmd_app_ug/run_app.rst
+++ b/doc/guides/testpmd_app_ug/run_app.rst
@@ -340,6 +340,7 @@ The commandline options are:
icmpecho
ieee1588
tm
+   noisy
 
 *   ``--rss-ip``
 
@@ -479,3 +480,29 @@ The commandline options are:
 
 Set the hexadecimal bitmask of TX queue offloads.
 The default value is 0.
+
+*   ``--noisy-buffersize-before-sending=N``
+
+Set the number of maximum elements  of the FIFO queue to be created 
+for buffering packets. Only available with the noisy forwarding mode.
+The default value is 0.
+
+*   ``--noisy-flush-timeout=N``
+
+Set the size of the FIFO queue to be created for buffering packets.
+Only available with the noisy forwarding mode. The default value is 0.
+
+*   ``--noisy-nb-rnd-read=N``
+
+Set the number of reads to be done in noisy neighbour simulation memory 
buffer.
+Only available with the noisy forwarding mode. The default value is 0.
+
+*   ``--noisy-nb-rnd-write=N``
+
+Set the number of writes to be done in noisy neighbour simulation memory 
buffer.
+Only available with the noisy forwarding mode. The default value is 0.
+
+*   ``--noisy-nb-rnd-write=N``
+
+Set the number of r/w access to be done in noisy neighbour simulation 
memory buffer.
+Only available with the noisy forwarding mode. The default value is 0.
diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst 
b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
index a766ac795..0c1f49ae3 100644
--- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
+++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
@@ -289,7 +289,7 @@ set fwd
 Set the packet forwarding mode::
 
testpmd> set fwd (io|mac|macswap|flowgen| \
- rxonly|txonly|csum|icmpecho) (""|retry)
+ rxonly|txonly|csum|icmpecho|noisy) (""|retry)
 
 ``retry`` can be specified for forwarding engines except ``rx_only``.
 
@@ -327,8 +327,11 @@ The available information categories are:
   also modify the default hierarchy or specify the new hierarchy through CLI 
for
   implementing QoS scheduler.  Requires ``CONFIG_RTE_LIBRTE_PMD_SOFTNIC=y`` 
``CONFIG_RTE_LIBRTE_SCHED=y``.
 
-Example::
+* ``noisy``: Noisy neighbour simulation.
+  Simulate more realistic behavior of a guest machine engaged in receiving
+  and sending packets performing Virtual Network Function (VNF).
 
+Example::
testpmd> set fwd rxonly
 
Set rxonly packet forwarding mode
-- 
2.14.3



[dpdk-dev] [PATCH v4 02/20] net/virtio: add virtio 1.1 defines

2018-04-19 Thread Jens Freimann
Signed-off-by: Jens Freimann 
---
 drivers/net/virtio/virtio_ring.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/virtio/virtio_ring.h b/drivers/net/virtio/virtio_ring.h
index cea4d441e..76168eed7 100644
--- a/drivers/net/virtio/virtio_ring.h
+++ b/drivers/net/virtio/virtio_ring.h
@@ -15,7 +15,11 @@
 #define VRING_DESC_F_WRITE  2
 /* This means the buffer contains a list of buffer descriptors. */
 #define VRING_DESC_F_INDIRECT   4
+/* This flag means the descriptor was made available by the driver */
 
+#define VRING_DESC_F_AVAIL (1ULL << 7)
+/* This flag means the descriptor was used by the device */
+#define VRING_DESC_F_USED  (1ULL << 15)
 /* The Host uses this in used->flags to advise the Guest: don't kick me
  * when you add a buffer.  It's unreliable, so it's simply an
  * optimization.  Guest will still kick if it's out of buffers. */
-- 
2.14.3



[dpdk-dev] [PATCH v4 01/20] net/virtio: vring init for packed queues

2018-04-19 Thread Jens Freimann
Add and initialize descriptor data structures.

Signed-off-by: Jens Freimann 
---
 drivers/net/virtio/virtio_ethdev.c | 22 ---
 drivers/net/virtio/virtio_pci.h|  8 ++
 drivers/net/virtio/virtio_ring.h   | 55 ++
 drivers/net/virtio/virtqueue.h | 10 +++
 4 files changed, 80 insertions(+), 15 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 41042cb23..0c9540b89 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -298,19 +298,21 @@ virtio_init_vring(struct virtqueue *vq)
 
PMD_INIT_FUNC_TRACE();
 
-   /*
-* Reinitialise since virtio port might have been stopped and restarted
-*/
memset(ring_mem, 0, vq->vq_ring_size);
-   vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN);
-   vq->vq_used_cons_idx = 0;
-   vq->vq_desc_head_idx = 0;
-   vq->vq_avail_idx = 0;
-   vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
+   vring_init(vq->hw, vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN);
+
vq->vq_free_cnt = vq->vq_nentries;
memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
+   vq->vq_used_cons_idx = 0;
+   vq->vq_avail_idx = 0;
+   if (vtpci_packed_queue(vq->hw)) {
+   vring_desc_init_packed(vr, size);
+   } else {
+   vq->vq_desc_head_idx = 0;
+   vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
 
-   vring_desc_init(vr->desc, size);
+   vring_desc_init(vr->desc, size);
+   }
 
/*
 * Disable device(host) interrupting guest
@@ -385,7 +387,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t 
vtpci_queue_idx)
/*
 * Reserve a memzone for vring elements
 */
-   size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
+   size = vring_size(hw, vq_size, VIRTIO_PCI_VRING_ALIGN);
vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
 size, vq->vq_ring_size);
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index a28ba8339..528fb46b9 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -112,6 +112,8 @@ struct virtnet_ctl;
 
 #define VIRTIO_F_VERSION_1 32
 #define VIRTIO_F_IOMMU_PLATFORM33
+#define VIRTIO_F_RING_PACKED   34
+#define VIRTIO_F_IN_ORDER  35
 
 /*
  * Some VirtIO feature bits (currently bits 28 through 31) are
@@ -304,6 +306,12 @@ vtpci_with_feature(struct virtio_hw *hw, uint64_t bit)
return (hw->guest_features & (1ULL << bit)) != 0;
 }
 
+static inline int
+vtpci_packed_queue(struct virtio_hw *hw)
+{
+   return vtpci_with_feature(hw, VIRTIO_F_RING_PACKED);
+}
+
 /*
  * Function declaration from virtio_pci.c
  */
diff --git a/drivers/net/virtio/virtio_ring.h b/drivers/net/virtio/virtio_ring.h
index 9e3c2a015..cea4d441e 100644
--- a/drivers/net/virtio/virtio_ring.h
+++ b/drivers/net/virtio/virtio_ring.h
@@ -54,11 +54,38 @@ struct vring_used {
struct vring_used_elem ring[0];
 };
 
+/* For support of packed virtqueues in Virtio 1.1 the format of descriptors
+ * looks like this.
+ */
+struct vring_desc_packed {
+   uint64_t addr;
+   uint32_t len;
+   uint16_t index;
+   uint16_t flags;
+};
+
+#define RING_EVENT_FLAGS_ENABLE 0x0
+#define RING_EVENT_FLAGS_DISABLE 0x1
+#define RING_EVENT_FLAGS_DESC 0x2
+struct vring_packed_desc_event {
+   uint16_t desc_event_off_wrap;
+   uint16_t desc_event_flags;
+};
+
 struct vring {
unsigned int num;
-   struct vring_desc  *desc;
-   struct vring_avail *avail;
-   struct vring_used  *used;
+   union {
+   struct vring_desc_packed *desc_packed;
+   struct vring_desc *desc;
+   };
+   union {
+   struct vring_avail *avail;
+   struct vring_packed_desc_event *driver_event;
+   };
+   union {
+   struct vring_used  *used;
+   struct vring_packed_desc_event *device_event;
+   };
 };
 
 /* The standard layout for the ring is a continuous chunk of memory which
@@ -95,10 +122,18 @@ struct vring {
 #define vring_avail_event(vr) (*(uint16_t *)&(vr)->used->ring[(vr)->num])
 
 static inline size_t
-vring_size(unsigned int num, unsigned long align)
+vring_size(struct virtio_hw *hw, unsigned int num, unsigned long align)
 {
size_t size;
 
+   if (vtpci_packed_queue(hw)) {
+   size = num * sizeof(struct vring_desc_packed);
+   size += sizeof(struct vring_packed_desc_event);
+   size = RTE_ALIGN_CEIL(size, align);
+   size += sizeof(struct vring_packed_desc_event);
+   return size;
+   }
+
size = num * sizeof(struct vring_desc);
size += sizeof(struct vri

[dpdk-dev] [PATCH v4 03/20] net/virtio: add packed virtqueue helpers

2018-04-19 Thread Jens Freimann
Add helper functions to set/clear and check descriptor flags.

Signed-off-by: Jens Freimann 
---
 drivers/net/virtio/virtio_ring.h | 38 ++
 drivers/net/virtio/virtqueue.h   | 19 +++
 2 files changed, 57 insertions(+)

diff --git a/drivers/net/virtio/virtio_ring.h b/drivers/net/virtio/virtio_ring.h
index 76168eed7..4eed023cc 100644
--- a/drivers/net/virtio/virtio_ring.h
+++ b/drivers/net/virtio/virtio_ring.h
@@ -78,6 +78,7 @@ struct vring_packed_desc_event {
 
 struct vring {
unsigned int num;
+   unsigned int avail_wrap_counter;
union {
struct vring_desc_packed *desc_packed;
struct vring_desc *desc;
@@ -92,6 +93,43 @@ struct vring {
};
 };
 
+static inline void
+toggle_wrap_counter(struct vring *vr)
+{
+   vr->avail_wrap_counter ^= 1;
+}
+
+static inline void
+_set_desc_avail(struct vring_desc_packed *desc,
+  int wrap_counter)
+{
+   uint16_t flags = desc->flags;
+
+   if (wrap_counter) {
+   flags |= VRING_DESC_F_AVAIL;
+   flags &= ~VRING_DESC_F_USED;
+   } else {
+   flags &= ~VRING_DESC_F_AVAIL;
+   flags |= VRING_DESC_F_USED;
+   }
+
+   desc->flags = flags;
+}
+
+static inline void
+set_desc_avail(struct vring *vr,
+ struct vring_desc_packed *desc)
+{
+   _set_desc_avail(desc, vr->avail_wrap_counter);
+}
+
+static inline int
+desc_is_used(struct vring_desc_packed *desc)
+{
+   return !(desc->flags & VRING_DESC_F_AVAIL) ==
+   !(desc->flags & VRING_DESC_F_USED);
+}
+
 /* The standard layout for the ring is a continuous chunk of memory which
  * looks like this.  We assume num is a power of 2.
  *
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index cc2e7c0f6..081b27a52 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -245,6 +245,25 @@ struct virtio_tx_region {
   __attribute__((__aligned__(16)));
 };
 
+static inline uint16_t
+increment_pq_index(uint16_t idx, size_t ring_size)
+{
+   return ++idx >= ring_size ? 0 : idx;
+}
+
+static inline uint16_t
+update_pq_avail_index(struct virtqueue *vq)
+{
+   uint16_t idx;
+
+   idx = increment_pq_index(vq->vq_avail_idx, vq->vq_nentries);
+   if (idx == 0)
+   toggle_wrap_counter(&vq->vq_ring);
+   vq->vq_avail_idx = idx;
+
+   return vq->vq_avail_idx;
+}
+
 static inline void
 vring_desc_init_packed(struct vring *vr, int n)
 {
-- 
2.14.3



[dpdk-dev] [PATCH v4 00/20] implement packed virtqueues

2018-04-19 Thread Jens Freimann
This is a basic implementation of packed virtqueues as specified in the
Virtio 1.1 draft. A compiled version of the current draft is available
at https://github.com/oasis-tcs/virtio-docs.git (or as .pdf at
https://github.com/oasis-tcs/virtio-docs/blob/master/virtio-v1.1-packed-wd10.pdf

It does not implement yet indirect descriptors and checksum offloading.

A packed virtqueue is different from a split virtqueue in that it
consists of only a single descriptor ring that replaces available and
used ring, index and descriptor buffer.

Each descriptor is readable and writable and has a flags field. These flags
will mark if a descriptor is available or used.  To detect new available 
descriptors
even after the ring has wrapped, device and driver each have a
single-bit wrap counter that is flipped from 0 to 1 and vice versa every time
the last descriptor in the ring is used/made available.

The idea behind this is to 1. improve performance by avoiding cache misses
and 2. be easier for devices to implement.

Regarding performance: with these patches I get 21.13 Mpps on my system
as compared to 18.8 Mpps with the virtio 1.0 code. Packet size was 64
bytes, 0.05% acceptable loss.  Test setup is described as in
http://dpdk.org/doc/guides/howto/pvp_reference_benchmark.html

Packet generator:
MoonGen
Intel(R) Xeon(R) CPU E5-2665 0 @ 2.40GHz
Intel X710 NIC
RHEL 7.4

Device under test:
Intel(R) Xeon(R) CPU E5-2667 v4 @ 3.20GHz
Intel X710 NIC
RHEL 7.4

VM on DuT: RHEL7.4

I plan to do more performance test with bigger frame sizes.

This patch series is based on a prototype implemented by Yuanhan Liu and
Tiwei Bie.



changes from v3->v4:
* added helpers to increment index and set available/used flags
* driver keeps track of number of descriptors used 
* change logic in set_rxtx_funcs()
* add patch for ctrl virtqueue with support for packed virtqueues
* rename virtio-1.1.h to virtio-packed.h
* fix wrong sizeof() in "vhost: vring address setup for packed queues"
* fix coding style of function definition in "net/virtio: add packed
  virtqueue helpers"
* fix padding in vring_size()
* move patches to enable packed virtqueues end of series
* v4 has two open problems: I'm sending it out anyway for feedback/help:
 * when VIRTIO_NET_F_MRG_RXBUF enabled only 128 packets are send in
   guest, i.e. when ring is full for the first time. I suspect a bug in
   setting the avail/used flags

changes from v2->v3:
* implement event suppression
* add code do dump packed virtqueues
* don't use assert in vhost code
* rename virtio-user parameter to packed-vq
* support rxvf flush 

changes from v1->v2:
* don't use VIRTQ_DESC_F_NEXT in used descriptors (Jason)
* no rte_panice() in guest triggerable code (Maxime)
* use unlikely when checking for vq (Maxime)
* rename everything from _1_1 to _packed  (Yuanhan)
* add two more patches to implement mergeable receive buffers 


Jens Freimann (16):
  net/virtio: vring init for packed queues
  net/virtio: add virtio 1.1 defines
  net/virtio: add packed virtqueue helpers
  net/virtio: flush packed receive virtqueues
  net/virtio: dump packed virtqueue data
  net/virtio: implement transmit path for packed queues
  net/virtio: add virtio send command packed queue support
  net/virtio: add support for mergeable buffers with packed virtqueues
  net/virtio: add support for event suppression
  vhost: add virtio packed virtqueue defines
  vhost: add helpers for packed virtqueues
  vhost: dequeue for packed queues
  vhost: packed queue enqueue path
  vhost: add support for mergeable buffers with packed virtqueues
  vhost: add event suppression for packed queues
  net/virtio: by default disable packed virtqueues

Yuanhan Liu (4):
  net/virtio-user: add option to use packed queues
  net/virtio: implement receive path for packed queues
  vhost: vring address setup for packed queues
  vhost: by default disable packed virtqueues

 config/common_base   |   2 +
 drivers/net/virtio/virtio_ethdev.c   | 127 +-
 drivers/net/virtio/virtio_ethdev.h   |   5 +
 drivers/net/virtio/virtio_pci.h  |   8 +
 drivers/net/virtio/virtio_ring.h |  97 -
 drivers/net/virtio/virtio_rxtx.c | 357 +++-
 drivers/net/virtio/virtio_user/virtio_user_dev.c |   6 +-
 drivers/net/virtio/virtio_user/virtio_user_dev.h |   3 +-
 drivers/net/virtio/virtio_user_ethdev.c  |  15 +-
 drivers/net/virtio/virtqueue.c   |  16 +
 drivers/net/virtio/virtqueue.h   | 113 -
 lib/librte_vhost/socket.c|   5 +
 lib/librte_vhost/vhost.c |  16 +
 lib/librte_vhost/vhost.h |  70 +++-
 lib/librte_vhost/vhost_user.c|  45 +-
 lib/librte_vhost/virtio-packed.h |  86 
 lib/librte_vhost/virtio_net.c| 503 +--
 17 files changed, 1398 insertions(+), 76 deletions(-)
 create 

[dpdk-dev] [PATCH v4 04/20] net/virtio: flush packed receive virtqueues

2018-04-19 Thread Jens Freimann
Flush used descriptors in packed receive virtqueue. As descriptors
can be chained we need to look at the stored number of used descriptors
to find out the length of the chain.

Signed-off-by: Jens Freimann 
---
 drivers/net/virtio/virtqueue.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/net/virtio/virtqueue.c b/drivers/net/virtio/virtqueue.c
index a7d0a9cbe..f2747e612 100644
--- a/drivers/net/virtio/virtqueue.c
+++ b/drivers/net/virtio/virtqueue.c
@@ -58,12 +58,28 @@ virtqueue_detach_unused(struct virtqueue *vq)
 void
 virtqueue_rxvq_flush(struct virtqueue *vq)
 {
+   struct vring_desc_packed *descs = vq->vq_ring.desc_packed;
struct virtnet_rx *rxq = &vq->rxq;
struct virtio_hw *hw = vq->hw;
struct vring_used_elem *uep;
struct vq_desc_extra *dxp;
uint16_t used_idx, desc_idx;
uint16_t nb_used, i;
+   uint16_t size = vq->vq_nentries;
+
+   if (vtpci_packed_queue(vq->hw)) {
+   i = vq->vq_used_cons_idx;
+   while (desc_is_used(&descs[i])) {
+   dxp = &vq->vq_descx[i];
+   if (dxp->cookie != NULL)
+   rte_pktmbuf_free(dxp->cookie);
+   vq->vq_free_cnt += dxp->ndescs;
+   i = i + dxp->ndescs;
+   i = i >= size ? i - size : i;
+   dxp->ndescs = 0;
+   }
+   return;
+   }
 
nb_used = VIRTQUEUE_NUSED(vq);
 
-- 
2.14.3



[dpdk-dev] [PATCH v4 07/20] net/virtio: implement transmit path for packed queues

2018-04-19 Thread Jens Freimann
This implements the transmit path for devices with
support for Virtio 1.1.

Add the feature bit for Virtio 1.1 and enable code to
add buffers to vring and mark descriptors as available.

This is based on a patch by Yuanhan Liu.

Signed-off-by: Jens Freiman 
---
 drivers/net/virtio/virtio_ethdev.c |   8 ++-
 drivers/net/virtio/virtio_ethdev.h |   2 +
 drivers/net/virtio/virtio_rxtx.c   | 104 -
 3 files changed, 112 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 0c9540b89..c5c2a268b 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -383,6 +383,8 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t 
vtpci_queue_idx)
vq->hw = hw;
vq->vq_queue_index = vtpci_queue_idx;
vq->vq_nentries = vq_size;
+   if (vtpci_packed_queue(hw))
+   vq->vq_ring.avail_wrap_counter = 1;
 
/*
 * Reserve a memzone for vring elements
@@ -1329,7 +1331,11 @@ set_rxtx_funcs(struct rte_eth_dev *eth_dev)
eth_dev->rx_pkt_burst = &virtio_recv_pkts;
}
 
-   if (hw->use_simple_tx) {
+   if (vtpci_packed_queue(hw)) {
+   PMD_INIT_LOG(INFO, "virtio: using virtio 1.1 Tx path on port 
%u",
+   eth_dev->data->port_id);
+   eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
+   } else if (hw->use_simple_tx) {
PMD_INIT_LOG(INFO, "virtio: using simple Tx path on port %u",
eth_dev->data->port_id);
eth_dev->tx_pkt_burst = virtio_xmit_pkts_simple;
diff --git a/drivers/net/virtio/virtio_ethdev.h 
b/drivers/net/virtio/virtio_ethdev.h
index bb40064ea..5420d7648 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -85,6 +85,8 @@ uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct 
rte_mbuf **rx_pkts,
 
 uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
+uint16_t virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
+   uint16_t nb_pkts);
 
 uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index a8aa87b32..b749babf3 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -38,6 +38,103 @@
 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
 #endif
 
+
+/* Cleanup from completed transmits. */
+static void
+virtio_xmit_cleanup_packed(struct virtqueue *vq)
+{
+   uint16_t idx;
+   uint16_t size = vq->vq_nentries;
+   struct vring_desc_packed *desc = vq->vq_ring.desc_packed;
+   struct vq_desc_extra *dxp;
+
+   idx = vq->vq_used_cons_idx;
+   while (desc_is_used(&desc[idx]) &&
+  vq->vq_free_cnt < size) {
+   dxp = &vq->vq_descx[idx];
+   vq->vq_free_cnt += dxp->ndescs;
+   idx = vq->vq_used_cons_idx + dxp->ndescs;
+   idx = idx >= size ? idx - size : idx;
+   }
+}
+
+uint16_t
+virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
+uint16_t nb_pkts)
+{
+   struct virtnet_tx *txvq = tx_queue;
+   struct virtqueue *vq = txvq->vq;
+   uint16_t i;
+   struct vring_desc_packed *desc = vq->vq_ring.desc_packed;
+   uint16_t idx;
+   struct vq_desc_extra *dxp;
+
+   if (unlikely(nb_pkts < 1))
+   return nb_pkts;
+
+   PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
+
+   if (likely(vq->vq_free_cnt < vq->vq_free_thresh))
+   virtio_xmit_cleanup_packed(vq);
+
+   for (i = 0; i < nb_pkts; i++) {
+   struct rte_mbuf *txm = tx_pkts[i];
+   struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
+   uint16_t head_idx;
+   int wrap_counter;
+   int descs_used;
+
+   if (unlikely(txm->nb_segs + 1 > vq->vq_free_cnt)) {
+   virtio_xmit_cleanup_packed(vq);
+
+   if (unlikely(txm->nb_segs + 1 > vq->vq_free_cnt)) {
+   PMD_TX_LOG(ERR,
+  "No free tx descriptors to 
transmit");
+   break;
+   }
+   }
+
+   txvq->stats.bytes += txm->pkt_len;
+
+   vq->vq_free_cnt -= txm->nb_segs + 1;
+
+   wrap_counter = vq->vq_ring.avail_wrap_counter;
+   idx = update_pq_avail_index(vq);
+   head_idx = idx;
+
+   dxp = &vq->vq_descx[idx];
+   if (dxp->cookie != NULL)
+   rte_pktmbuf_free(dxp->cookie);
+   dxp->cookie = txm;
+
+   desc[idx].addr  = txvq->virtio_net_hdr_mem +
+ RTE_PTR_DIFF(

[dpdk-dev] [PATCH v4 06/20] net/virtio-user: add option to use packed queues

2018-04-19 Thread Jens Freimann
From: Yuanhan Liu 

Add option to enable packed queue support for virtio-user
devices.

Signed-off-by: Yuanhan Liu 
---
 drivers/net/virtio/virtio_user/virtio_user_dev.c |  6 +-
 drivers/net/virtio/virtio_user/virtio_user_dev.h |  3 ++-
 drivers/net/virtio/virtio_user_ethdev.c  | 15 ++-
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c 
b/drivers/net/virtio/virtio_user/virtio_user_dev.c
index 38b8bc90d..3c4034854 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
@@ -334,7 +334,8 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
 
 int
 virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
-int cq, int queue_size, const char *mac, char **ifname)
+int cq, int queue_size, const char *mac, char **ifname,
+int packed_vq)
 {
snprintf(dev->path, PATH_MAX, "%s", path);
dev->max_queue_pairs = queues;
@@ -376,6 +377,9 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char 
*path, int queues,
dev->device_features = VIRTIO_USER_SUPPORTED_FEATURES;
}
 
+   if (packed_vq > 0)
+   dev->device_features |= (1ull << VIRTIO_F_RING_PACKED);
+
if (dev->mac_specified)
dev->device_features |= (1ull << VIRTIO_NET_F_MAC);
 
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h 
b/drivers/net/virtio/virtio_user/virtio_user_dev.h
index ade727e46..a6e42f93f 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.h
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
@@ -45,7 +45,8 @@ int is_vhost_user_by_type(const char *path);
 int virtio_user_start_device(struct virtio_user_dev *dev);
 int virtio_user_stop_device(struct virtio_user_dev *dev);
 int virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
-int cq, int queue_size, const char *mac, char 
**ifname);
+int cq, int queue_size, const char *mac, char **ifname,
+int packed_vq);
 void virtio_user_dev_uninit(struct virtio_user_dev *dev);
 void virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx);
 #endif
diff --git a/drivers/net/virtio/virtio_user_ethdev.c 
b/drivers/net/virtio/virtio_user_ethdev.c
index 4e7b3c34f..579583a59 100644
--- a/drivers/net/virtio/virtio_user_ethdev.c
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -341,6 +341,8 @@ static const char *valid_args[] = {
VIRTIO_USER_ARG_INTERFACE_NAME,
 #define VIRTIO_USER_ARG_SERVER_MODE "server"
VIRTIO_USER_ARG_SERVER_MODE,
+#define VIRTIO_USER_ARG_PACKED_VQ "packed_vq"
+   VIRTIO_USER_ARG_PACKED_VQ,
NULL
 };
 
@@ -447,6 +449,7 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
char *ifname = NULL;
char *mac_addr = NULL;
int ret = -1;
+   uint64_t packed_vq = 0;
 
kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_args);
if (!kvlist) {
@@ -530,6 +533,15 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
cq = 1;
}
 
+   if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_PACKED_VQ) == 1) {
+   if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_PACKED_VQ,
+  &get_integer_arg, &packed_vq) < 0) {
+   PMD_INIT_LOG(ERR, "error to parse %s",
+VIRTIO_USER_ARG_PACKED_VQ);
+   goto end;
+   }
+   }
+
if (queues > 1 && cq == 0) {
PMD_INIT_LOG(ERR, "multi-q requires ctrl-q");
goto end;
@@ -558,7 +570,8 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
else
vu_dev->is_server = false;
if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq,
-queue_size, mac_addr, &ifname) < 0) {
+queue_size, mac_addr, &ifname,
+packed_vq) < 0) {
PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");
virtio_user_eth_dev_free(eth_dev);
goto end;
-- 
2.14.3



[dpdk-dev] [PATCH v4 05/20] net/virtio: dump packed virtqueue data

2018-04-19 Thread Jens Freimann
Add support to dump packed virtqueue data to the
VIRTQUEUE_DUMP() macro.

Signed-off-by: Jens Freimann 
---
 drivers/net/virtio/virtqueue.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 081b27a52..ea804c9c7 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -379,6 +379,12 @@ virtqueue_notify(struct virtqueue *vq)
 
 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
 #define VIRTQUEUE_DUMP(vq) do { \
+   if (vtpci_packed_queue((vq)->hw)) { \
+ PMD_INIT_LOG(DEBUG, \
+ "VQ: - size=%d; free=%d; last_used_idx=%d;" \
+ (vq)->vq_nentries, (vq)->vq_free_cnt, nused); \
+ break; \
+   } \
uint16_t used_idx, nused; \
used_idx = (vq)->vq_ring.used->idx; \
nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \
-- 
2.14.3



[dpdk-dev] [PATCH v4 08/20] net/virtio: implement receive path for packed queues

2018-04-19 Thread Jens Freimann
From: Yuanhan Liu 

Implement the receive part here. No support for mergeable buffers yet.

Signed-off-by: Jens Freimann 
Signed-off-by: Yuanhan Liu 
---
 drivers/net/virtio/virtio_ethdev.c |  14 +++-
 drivers/net/virtio/virtio_ethdev.h |   2 +
 drivers/net/virtio/virtio_rxtx.c   | 137 -
 3 files changed, 150 insertions(+), 3 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index c5c2a268b..e4c039a48 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1316,10 +1316,19 @@ set_rxtx_funcs(struct rte_eth_dev *eth_dev)
 {
struct virtio_hw *hw = eth_dev->data->dev_private;
 
-   if (hw->use_simple_rx) {
+   /*
+* workarount for packed vqs which don't support
+* mrg_rxbuf at this point
+*/
+   if (vtpci_packed_queue(hw) &&
+   vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
+   eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
+   } else if (hw->use_simple_rx) {
PMD_INIT_LOG(INFO, "virtio: using simple Rx path on port %u",
eth_dev->data->port_id);
eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
+   } else if (vtpci_packed_queue(hw)) {
+   eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
} else if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
PMD_INIT_LOG(INFO,
"virtio: using mergeable buffer Rx path on port %u",
@@ -1475,7 +1484,8 @@ virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t 
req_features)
 
/* Setting up rx_header size for the device */
if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
-   vtpci_with_feature(hw, VIRTIO_F_VERSION_1))
+   vtpci_with_feature(hw, VIRTIO_F_VERSION_1) ||
+   vtpci_with_feature(hw, VIRTIO_F_RING_PACKED))
hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
else
hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
diff --git a/drivers/net/virtio/virtio_ethdev.h 
b/drivers/net/virtio/virtio_ethdev.h
index 5420d7648..cb1399b3b 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -79,6 +79,8 @@ int virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
 
 uint16_t virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);
+uint16_t virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
+   uint16_t nb_pkts);
 
 uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index b749babf3..6f6807547 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -31,6 +31,7 @@
 #include "virtqueue.h"
 #include "virtio_rxtx.h"
 #include "virtio_rxtx_simple.h"
+#include "virtio_ring.h"
 
 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
@@ -523,10 +524,38 @@ virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, 
uint16_t queue_idx)
struct virtnet_rx *rxvq = &vq->rxq;
struct rte_mbuf *m;
uint16_t desc_idx;
-   int error, nbufs;
+   int error, nbufs = 0;
 
PMD_INIT_FUNC_TRACE();
 
+   if (vtpci_packed_queue(hw)) {
+   struct vring_desc_packed *desc;
+   struct vq_desc_extra *dxp;
+
+   for (desc_idx = 0; desc_idx < vq->vq_nentries;
+   desc_idx++) {
+   m = rte_mbuf_raw_alloc(rxvq->mpool);
+   if (unlikely(m == NULL))
+   return -ENOMEM;
+
+   dxp = &vq->vq_descx[desc_idx];
+   dxp->cookie = m;
+   dxp->ndescs = 1;
+
+   desc = &vq->vq_ring.desc_packed[desc_idx];
+   desc->addr = VIRTIO_MBUF_ADDR(m, vq) +
+   RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
+   desc->len = m->buf_len - RTE_PKTMBUF_HEADROOM +
+   hw->vtnet_hdr_size;
+   desc->flags |= VRING_DESC_F_WRITE;
+   rte_smp_wmb();
+   set_desc_avail(&vq->vq_ring, desc);
+   }
+   toggle_wrap_counter(&vq->vq_ring);
+   nbufs = desc_idx;
+   goto out;
+   }
+
/* Allocate blank mbufs for the each rx descriptor */
nbufs = 0;
 
@@ -571,6 +600,7 @@ virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, 
uint16_t queue_idx)
vq_update_avail_idx(vq);
}
 
+out:
PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
 
VIRTQUEUE_DUMP(vq);
@@ -801,6 +831,111 @@ rx_offload_enabled(struct virtio_hw *hw)
 

[dpdk-dev] [PATCH v4 09/20] net/virtio: add virtio send command packed queue support

2018-04-19 Thread Jens Freimann
Use packed virtqueue format when reading and writing descriptors
to/from the ring.

Signed-off-by: Jens Freimann 
---
 drivers/net/virtio/virtio_ethdev.c | 75 ++
 1 file changed, 75 insertions(+)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index e4c039a48..b9fada638 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -140,6 +140,75 @@ static const struct rte_virtio_xstats_name_off 
rte_virtio_txq_stat_strings[] = {
 
 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
 
+static struct virtio_pmd_ctrl *
+virtio_pq_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
+   int *dlen, int pkt_num)
+{
+   struct virtqueue *vq = cvq->vq;
+   int head_idx = (vq->vq_avail_idx++) & (vq->vq_nentries -1);
+   struct vring_desc_packed *desc = vq->vq_ring.desc_packed;
+   int used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
+   int flags = desc[head_idx].flags;
+   struct virtio_pmd_ctrl *result;
+   int wrap_counter;
+   int sum = 0;
+   int i;
+
+   wrap_counter = vq->vq_ring.avail_wrap_counter;
+   if ((head_idx & (vq->vq_nentries - 1)) == 0)
+   toggle_wrap_counter(&vq->vq_ring);
+
+   for (i = 0; i < pkt_num; i++) {
+   flags |= VRING_DESC_F_NEXT;
+   desc[head_idx].addr = cvq->virtio_net_hdr_mem
+   + sizeof(struct virtio_net_ctrl_hdr)
+   + sizeof(ctrl->status) + sizeof(uint8_t) * sum;
+   desc[head_idx].len = dlen[i];
+   rte_smp_wmb();
+   desc[head_idx].flags = flags;
+   sum += dlen[i];
+   vq->vq_free_cnt--;
+   _set_desc_avail(&desc[head_idx], wrap_counter);
+   head_idx = (vq->vq_avail_idx++) & (vq->vq_nentries - 1);
+   if ((head_idx & (vq->vq_nentries - 1)) == 0)
+   toggle_wrap_counter(&vq->vq_ring);
+   }
+
+
+   flags = VRING_DESC_F_WRITE;
+   desc[head_idx].addr = cvq->virtio_net_hdr_mem
+   + sizeof(struct virtio_net_ctrl_hdr);
+   desc[head_idx].len = sizeof(ctrl->status);
+   vq->vq_free_cnt--;
+   rte_smp_wmb();
+   _set_desc_avail(&desc[head_idx], wrap_counter);
+   desc[head_idx].flags = flags;
+
+   virtqueue_notify(vq);
+
+   rte_rmb();
+   /* wait for used descriptors in virtqueue */
+   while(!desc_is_used(&desc[head_idx])) {
+   rte_rmb();
+   usleep(100);
+   }
+
+   /* now get used descriptors */
+   while(desc_is_used(&desc[used_idx])) {
+   used_idx = (vq->vq_used_cons_idx++) & (vq->vq_nentries - 1);
+   vq->vq_free_cnt++;
+   rte_smp_wmb();
+   set_desc_avail(&vq->vq_ring, &desc[used_idx]);
+   if ((used_idx & (vq->vq_nentries - 1)) == 0)
+   toggle_wrap_counter(&vq->vq_ring);
+}
+
+   vq->vq_used_cons_idx = used_idx;
+
+   result = cvq->virtio_net_hdr_mz->addr;
+   return result;
+}
+
 static int
 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
int *dlen, int pkt_num)
@@ -173,6 +242,11 @@ virtio_send_command(struct virtnet_ctl *cvq, struct 
virtio_pmd_ctrl *ctrl,
memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
sizeof(struct virtio_pmd_ctrl));
 
+   if (vtpci_packed_queue(vq->hw)) {
+   result = virtio_pq_send_command(cvq, ctrl, dlen, pkt_num);
+   goto out_unlock;
+   }
+
/*
 * Format is enforced in qemu code:
 * One TX packet for header;
@@ -244,6 +318,7 @@ virtio_send_command(struct virtnet_ctl *cvq, struct 
virtio_pmd_ctrl *ctrl,
 
result = cvq->virtio_net_hdr_mz->addr;
 
+out_unlock:
rte_spinlock_unlock(&cvq->lock);
return result->status;
 }
-- 
2.14.3



[dpdk-dev] [PATCH v4 12/20] vhost: add virtio packed virtqueue defines

2018-04-19 Thread Jens Freimann
Signed-off-by: Jens Freimann 
---
 lib/librte_vhost/vhost.h |  4 
 lib/librte_vhost/virtio-packed.h | 22 ++
 2 files changed, 26 insertions(+)
 create mode 100644 lib/librte_vhost/virtio-packed.h

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index c9b64461d..94bb07d84 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -177,6 +177,10 @@ struct vhost_msg {
 #ifndef VIRTIO_F_VERSION_1
  #define VIRTIO_F_VERSION_1 32
 #endif
+#ifndef VIRTIO_F_RING_PACKED
+ #define VIRTIO_F_RING_PACKED 34
+#endif
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
 
 /* Features supported by this builtin vhost-user net driver. */
 #define VIRTIO_NET_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
diff --git a/lib/librte_vhost/virtio-packed.h b/lib/librte_vhost/virtio-packed.h
new file mode 100644
index 0..744b3991b
--- /dev/null
+++ b/lib/librte_vhost/virtio-packed.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) Red Hat Inc.
+ */
+
+#ifndef __VIRTIO_PACKED_H
+#define __VIRTIO_PACKED_H
+
+#define VRING_DESC_F_NEXT   1
+#define VRING_DESC_F_WRITE  2
+#define VRING_DESC_F_INDIRECT   4
+
+#define VRING_DESC_F_AVAIL  (1ULL << 7)
+#define VRING_DESC_F_USED  (1ULL << 15)
+
+struct vring_desc_packed {
+   uint64_t addr;
+   uint32_t len;
+   uint16_t index;
+   uint16_t flags;
+};
+
+#endif /* __VIRTIO_PACKED_H */
-- 
2.14.3



[dpdk-dev] [PATCH v4 11/20] net/virtio: add support for event suppression

2018-04-19 Thread Jens Freimann
Signed-off-by: Jens Freimann 
---
 drivers/net/virtio/virtio_ethdev.c |  2 +-
 drivers/net/virtio/virtio_rxtx.c   | 15 +++-
 drivers/net/virtio/virtqueue.h | 77 --
 3 files changed, 89 insertions(+), 5 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index f9af3fcdb..30c04aa19 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -802,7 +802,7 @@ virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, 
uint16_t queue_id)
struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
struct virtqueue *vq = rxvq->vq;
 
-   virtqueue_enable_intr(vq);
+   virtqueue_enable_intr(vq, 0, 0);
return 0;
 }
 
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 42b1d5997..a6b24ea64 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -128,6 +128,10 @@ virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf 
**tx_pkts,
rte_smp_wmb();
_set_desc_avail(&desc[head_idx], wrap_counter);
vq->vq_descx[head_idx].ndescs = descs_used;
+   if (unlikely(virtqueue_kick_prepare_packed(vq))) {
+   virtqueue_notify(vq);
+   PMD_RX_LOG(DEBUG, "Notified");
+   }
}
 
txvq->stats.packets += i;
@@ -1003,6 +1007,10 @@ virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf 
**rx_pkts,
}
 
rxvq->stats.packets += nb_rx;
+   if (nb_rx > 0 && unlikely(virtqueue_kick_prepare_packed(vq))) {
+   virtqueue_notify(vq);
+   PMD_RX_LOG(DEBUG, "Notified");
+   }
 
vq->vq_used_cons_idx = used_idx;
 
@@ -1280,8 +1288,13 @@ virtio_recv_mergeable_pkts(void *rx_queue,
 
rxvq->stats.packets += nb_rx;
 
-   if (vtpci_packed_queue(vq->hw))
+   if (vtpci_packed_queue(vq->hw)) {
+   if (unlikely(virtqueue_kick_prepare(vq))) {
+   virtqueue_notify(vq);
+   PMD_RX_LOG(DEBUG, "Notified");
+   }
return nb_rx;
+   }
 
/* Allocate new mbuf for the used descriptor */
error = ENOSPC;
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 0df845e0b..96152ac76 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -176,6 +176,8 @@ struct virtqueue {
uint16_t vq_free_cnt;  /**< num of desc available */
uint16_t vq_avail_idx; /**< sync until needed */
uint16_t vq_free_thresh; /**< free threshold */
+   uint16_t vq_signalled_avail;
+   int vq_signalled_avail_valid;
 
void *vq_ring_virt_mem;  /**< linear address of vring*/
unsigned int vq_ring_size;
@@ -292,16 +294,37 @@ vring_desc_init(struct vring_desc *dp, uint16_t n)
 static inline void
 virtqueue_disable_intr(struct virtqueue *vq)
 {
-   vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
+   if (vtpci_packed_queue(vq->hw) && vtpci_with_feature(vq->hw,
+   VIRTIO_RING_F_EVENT_IDX))
+   vq->vq_ring.device_event->desc_event_flags =
+   RING_EVENT_FLAGS_DISABLE;
+   else
+   vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
 }
 
 /**
  * Tell the backend to interrupt us.
  */
 static inline void
-virtqueue_enable_intr(struct virtqueue *vq)
+virtqueue_enable_intr(struct virtqueue *vq, uint16_t off, uint16_t 
wrap_counter)
 {
-   vq->vq_ring.avail->flags &= (~VRING_AVAIL_F_NO_INTERRUPT);
+   uint16_t *flags = &vq->vq_ring.device_event->desc_event_flags;
+   uint16_t *event_off_wrap =
+   &vq->vq_ring.device_event->desc_event_off_wrap;
+   if (vtpci_packed_queue(vq->hw)) {
+   *flags = 0;
+   *event_off_wrap = 0;
+   if (*event_off_wrap & RING_EVENT_FLAGS_DESC) {
+   *event_off_wrap = off | 0x7FFF;
+   *event_off_wrap |= wrap_counter << 15;
+   *flags |= RING_EVENT_FLAGS_DESC;
+   } else {
+   *event_off_wrap = 0;
+   }
+   *flags |= RING_EVENT_FLAGS_ENABLE;
+   } else {
+   vq->vq_ring.avail->flags &= (~VRING_AVAIL_F_NO_INTERRUPT);
+   }
 }
 
 /**
@@ -361,12 +384,60 @@ vq_update_avail_ring(struct virtqueue *vq, uint16_t 
desc_idx)
vq->vq_avail_idx++;
 }
 
+static int vhost_idx_diff(struct virtqueue *vq, uint16_t old, uint16_t new)
+{
+   if (new > old)
+   return new - old;
+   return  (new + vq->vq_nentries - old);
+}
+
+static int vring_packed_need_event(struct virtqueue *vq,
+   uint16_t event_off, uint16_t new,
+   uint16_t old)
+{
+   return (uint16_t)(vhost_idx_diff(vq, new, event_off) - 1) <
+   (uint16_t)vhost_idx_diff(vq, new, old);
+}
+
+

[dpdk-dev] [PATCH v4 10/20] net/virtio: add support for mergeable buffers with packed virtqueues

2018-04-19 Thread Jens Freimann
Implement support for receiving merged buffers in virtio when packed
virtqueues are enabled.

Signed-off-by: Jens Freimann 
---
 drivers/net/virtio/virtio_ethdev.c |  14 ++---
 drivers/net/virtio/virtio_rxtx.c   | 103 ++---
 drivers/net/virtio/virtqueue.h |   1 +
 3 files changed, 103 insertions(+), 15 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index b9fada638..f9af3fcdb 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1391,19 +1391,15 @@ set_rxtx_funcs(struct rte_eth_dev *eth_dev)
 {
struct virtio_hw *hw = eth_dev->data->dev_private;
 
-   /*
-* workarount for packed vqs which don't support
-* mrg_rxbuf at this point
-*/
-   if (vtpci_packed_queue(hw) &&
-   vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
-   eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
-   } else if (hw->use_simple_rx) {
+   if (hw->use_simple_rx) {
PMD_INIT_LOG(INFO, "virtio: using simple Rx path on port %u",
eth_dev->data->port_id);
eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
} else if (vtpci_packed_queue(hw)) {
-   eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
+   if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF))
+   eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
+   else
+   eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
} else if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
PMD_INIT_LOG(INFO,
"virtio: using mergeable buffer Rx path on port %u",
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 6f6807547..42b1d5997 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -179,6 +179,79 @@ vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
dp->next = VQ_RING_DESC_CHAIN_END;
 }
 
+static void
+virtio_refill_packed(struct virtqueue *vq, uint16_t used_idx,
+struct virtnet_rx *rxvq)
+{
+   struct vq_desc_extra *dxp;
+   struct vring_desc_packed *descs = vq->vq_ring.desc_packed;
+   struct vring_desc_packed *desc;
+   struct rte_mbuf *nmb;
+
+   nmb = rte_mbuf_raw_alloc(rxvq->mpool);
+   if (unlikely(nmb == NULL)) {
+   struct rte_eth_dev *dev
+   = &rte_eth_devices[rxvq->port_id];
+   dev->data->rx_mbuf_alloc_failed++;
+   return;
+   }
+
+   desc = &descs[used_idx];
+
+   dxp = &vq->vq_descx[used_idx];
+
+   dxp->cookie = nmb;
+   dxp->ndescs = 1;
+
+   desc->addr = VIRTIO_MBUF_ADDR(nmb, vq) +
+   RTE_PKTMBUF_HEADROOM - vq->hw->vtnet_hdr_size;
+   desc->len = nmb->buf_len - RTE_PKTMBUF_HEADROOM +
+   vq->hw->vtnet_hdr_size;
+   desc->flags |= VRING_DESC_F_WRITE;
+}
+
+static uint16_t
+virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
+ struct rte_mbuf **rx_pkts,
+ uint32_t *len,
+ uint16_t num,
+ struct virtnet_rx *rx_queue)
+{
+   struct rte_mbuf *cookie;
+   uint16_t used_idx;
+   struct vring_desc_packed *desc;
+   uint16_t i;
+
+   for (i = 0; i < num; i++) {
+   used_idx = vq->vq_used_cons_idx;
+   desc = &vq->vq_ring.desc_packed[used_idx];
+   if (!desc_is_used(desc))
+   return i;
+   len[i] = desc->len;
+   cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
+
+   if (unlikely(cookie == NULL)) {
+   PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie 
at %u",
+   vq->vq_used_cons_idx);
+   break;
+   }
+   rte_prefetch0(cookie);
+   rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
+   rx_pkts[i] = cookie;
+
+   virtio_refill_packed(vq, used_idx, rx_queue);
+
+   rte_smp_wmb();
+   if (vq->vq_used_cons_idx == 0)
+   toggle_wrap_counter(&vq->vq_ring);
+   set_desc_avail(&vq->vq_ring, desc);
+   vq->vq_used_cons_idx = increment_pq_index(vq->vq_used_cons_idx,
+ vq->vq_nentries);
+   }
+
+   return i;
+}
+
 static uint16_t
 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
   uint32_t *len, uint16_t num)
@@ -1068,12 +1141,16 @@ virtio_recv_mergeable_pkts(void *rx_queue,
uint32_t seg_res;
uint32_t hdr_size;
int offload;
+   uint32_t rx_num = 0;
 
nb_rx = 0;
if (unlikely(hw->st

[dpdk-dev] [PATCH v4 13/20] vhost: add helpers for packed virtqueues

2018-04-19 Thread Jens Freimann
Add some helper functions to set/check descriptor flags
and toggle the used wrap counter.

Signed-off-by: Jens Freimann 
---
 lib/librte_vhost/virtio-packed.h | 64 
 1 file changed, 64 insertions(+)

diff --git a/lib/librte_vhost/virtio-packed.h b/lib/librte_vhost/virtio-packed.h
index 744b3991b..cd272d456 100644
--- a/lib/librte_vhost/virtio-packed.h
+++ b/lib/librte_vhost/virtio-packed.h
@@ -19,4 +19,68 @@ struct vring_desc_packed {
uint16_t flags;
 };
 
+static inline uint64_t
+vq_is_packed(struct virtio_net *dev)
+{
+   return dev->features & (1ull << VIRTIO_F_RING_PACKED);
+}
+
+static inline void
+toggle_wrap_counter(struct vhost_virtqueue *vq)
+{
+   vq->used_wrap_counter ^= 1;
+}
+
+static inline uint16_t
+increase_index (uint16_t index, uint32_t size)
+{
+   return ++index >= size ? 0 : index;
+}
+
+static inline uint16_t
+update_index (struct vhost_virtqueue *vq, uint16_t index, uint32_t size) {
+   index = increase_index(index, size);
+   if (increase_index(index, size) == 0)
+   toggle_wrap_counter(vq);
+   return index;
+}
+
+static inline int
+desc_is_avail(struct vhost_virtqueue *vq, struct vring_desc_packed *desc)
+{
+   if (vq->used_wrap_counter == 1) {
+   if ((desc->flags & VRING_DESC_F_AVAIL) &&
+   !(desc->flags & VRING_DESC_F_USED))
+   return 1;
+   }
+   if (vq->used_wrap_counter == 0) {
+   if (!(desc->flags & VRING_DESC_F_AVAIL) &&
+   (desc->flags & VRING_DESC_F_USED))
+   return 1;
+   }
+   return 0;
+}
+
+static inline void
+_set_desc_used(struct vring_desc_packed *desc, int wrap_counter)
+{
+   uint16_t flags = desc->flags;
+
+   if (wrap_counter == 1) {
+   flags |= VRING_DESC_F_USED;
+   flags |= VRING_DESC_F_AVAIL;
+   } else {
+   flags &= ~VRING_DESC_F_USED;
+   flags &= ~VRING_DESC_F_AVAIL;
+   }
+
+   desc->flags = flags;
+}
+
+static inline void
+set_desc_used(struct vhost_virtqueue *vq, struct vring_desc_packed *desc)
+{
+   _set_desc_used(desc, vq->used_wrap_counter);
+}
+
 #endif /* __VIRTIO_PACKED_H */
-- 
2.14.3



[dpdk-dev] [PATCH v4 15/20] vhost: dequeue for packed queues

2018-04-19 Thread Jens Freimann
Implement code to dequeue and process descriptors from
the vring if VIRTIO_F_RING_PACKED is enabled.

Check if descriptor was made available by driver by looking at
VIRTIO_F_DESC_AVAIL flag in descriptor. If so dequeue and set
the used flag VIRTIO_F_DESC_USED to the current value of the
used wrap counter.

Used ring wrap counter needs to be toggled when last descriptor is
written out. This allows the host/guest to detect new descriptors even
after the ring has wrapped.

Signed-off-by: Jens Freimann 
---
 lib/librte_vhost/vhost.c  |   1 +
 lib/librte_vhost/virtio_net.c | 220 ++
 2 files changed, 221 insertions(+)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 5ddf55ed9..f7989cfbd 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -185,6 +185,7 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
 
vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+   vq->used_wrap_counter = 1;
 
vhost_user_iotlb_init(dev, vring_idx);
/* Backends are set to -1 indicating an inactive device. */
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index ed7198dbb..9a9ff92f9 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -19,6 +19,7 @@
 
 #include "iotlb.h"
 #include "vhost.h"
+#include "virtio-packed.h"
 
 #define MAX_PKT_BURST 32
 
@@ -1118,6 +1119,221 @@ restore_mbuf(struct rte_mbuf *m)
}
 }
 
+static inline uint16_t
+dequeue_desc_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+struct rte_mempool *mbuf_pool, struct rte_mbuf *m,
+struct vring_desc_packed *descs)
+{
+   struct vring_desc_packed *desc;
+   uint64_t desc_addr;
+   uint32_t desc_avail, desc_offset;
+   uint32_t mbuf_avail, mbuf_offset;
+   uint32_t cpy_len;
+   struct rte_mbuf *cur = m, *prev = m;
+   struct virtio_net_hdr *hdr = NULL;
+   uint16_t head_idx = vq->last_used_idx & (vq->size - 1);
+   int wrap_counter = vq->used_wrap_counter;
+   int rc = 0;
+
+   if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+   vhost_user_iotlb_rd_lock(vq);
+
+   desc = &descs[vq->last_used_idx & (vq->size - 1)];
+   if (unlikely((desc->len < dev->vhost_hlen)) ||
+   (desc->flags & VRING_DESC_F_INDIRECT)) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "INDIRECT not supported yet\n");
+   rc = -1;
+   goto out;
+   }
+
+   desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+ sizeof(*desc), VHOST_ACCESS_RO);
+
+   if (unlikely(!desc_addr)) {
+   rc = -1;
+   goto out;
+   }
+
+   if (virtio_net_with_host_offload(dev)) {
+   hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+   rte_prefetch0(hdr);
+   }
+
+   /*
+* A virtio driver normally uses at least 2 desc buffers
+* for Tx: the first for storing the header, and others
+* for storing the data.
+*/
+   if (likely((desc->len == dev->vhost_hlen) &&
+  (desc->flags & VRING_DESC_F_NEXT) != 0)) {
+   if ((++vq->last_used_idx & (vq->size - 1)) == 0)
+   toggle_wrap_counter(vq);
+
+   desc = &descs[vq->last_used_idx & (vq->size - 1)];
+
+   desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+ sizeof(*desc), VHOST_ACCESS_RO);
+   if (unlikely(!desc_addr)) {
+   rc = -1;
+   goto out;
+   }
+
+   desc_offset = 0;
+   desc_avail  = desc->len;
+   } else {
+   desc_avail  = desc->len - dev->vhost_hlen;
+   desc_offset = dev->vhost_hlen;
+   }
+
+   rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
+
+   PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
+
+   mbuf_offset = 0;
+   mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
+   while (1) {
+   uint64_t hpa;
+
+   cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+
+   /*
+* A desc buf might across two host physical pages that are
+* not continuous. In such case (gpa_to_hpa returns 0), data
+* will be copied even though zero copy is enabled.
+*/
+   if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
+   desc->addr + desc_offset, cpy_len {
+   cur->data_len = cpy_len;
+   cur->data_off = 0;
+   cur->buf_addr = (void *)(uintptr_t)desc_addr;
+   cur->buf_physaddr = hpa;
+
+   /*
+ 

[dpdk-dev] [PATCH v4 14/20] vhost: vring address setup for packed queues

2018-04-19 Thread Jens Freimann
From: Yuanhan Liu 

Add code to set up packed queues when enabled.

Signed-off-by: Yuanhan Liu 
Signed-off-by: Jens Freimann 
---
 lib/librte_vhost/vhost.h  |  2 ++
 lib/librte_vhost/vhost_user.c | 23 ++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 94bb07d84..272d45f54 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -74,6 +74,7 @@ struct batch_copy_elem {
  */
 struct vhost_virtqueue {
struct vring_desc   *desc;
+   struct vring_desc_packed   *desc_packed;
struct vring_avail  *avail;
struct vring_used   *used;
uint32_tsize;
@@ -111,6 +112,7 @@ struct vhost_virtqueue {
 
struct batch_copy_elem  *batch_copy_elems;
uint16_tbatch_copy_nb_elems;
+   uint16_tused_wrap_counter;
 
rte_rwlock_tiotlb_lock;
rte_rwlock_tiotlb_pending_lock;
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index a3dccf67b..27b10c00c 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -39,6 +39,7 @@
 #include "iotlb.h"
 #include "vhost.h"
 #include "vhost_user.h"
+#include "virtio-packed.h"
 
 #define VIRTIO_MIN_MTU 68
 #define VIRTIO_MAX_MTU 65535
@@ -471,6 +472,24 @@ translate_ring_addresses(struct virtio_net *dev, int 
vq_index)
struct vhost_virtqueue *vq = dev->virtqueue[vq_index];
struct vhost_vring_addr *addr = &vq->ring_addrs;
 
+   if (vq_is_packed(dev)) {
+   vq->desc_packed = (struct vring_desc_packed *) ring_addr_to_vva
+   (dev, vq, addr->desc_user_addr,
+sizeof(struct vring_desc_packed));
+   vq->desc = NULL;
+   vq->avail = NULL;
+   vq->used = NULL;
+   vq->log_guest_addr = 0;
+
+   if (vq->last_used_idx != 0) {
+   RTE_LOG(WARNING, VHOST_CONFIG,
+   "last_used_idx (%u) not 0\n",
+   vq->last_used_idx);
+   vq->last_used_idx = 0;
+   }
+   return dev;
+   }
+
/* The addresses are converted from QEMU virtual to Vhost virtual. */
if (vq->desc && vq->avail && vq->used)
return dev;
@@ -483,6 +502,7 @@ translate_ring_addresses(struct virtio_net *dev, int 
vq_index)
dev->vid);
return dev;
}
+   vq->desc_packed = NULL;
 
dev = numa_realloc(dev, vq_index);
vq = dev->virtqueue[vq_index];
@@ -855,7 +875,8 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct 
VhostUserMsg *pmsg)
 static int
 vq_is_ready(struct vhost_virtqueue *vq)
 {
-   return vq && vq->desc && vq->avail && vq->used &&
+   return vq &&
+  (vq->desc_packed || (vq->desc && vq->avail && vq->used)) &&
   vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
   vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
 }
-- 
2.14.3



[dpdk-dev] [PATCH v4 16/20] vhost: packed queue enqueue path

2018-04-19 Thread Jens Freimann
Implement enqueue of packets to the receive virtqueue.

Set descriptor flag VIRTQ_DESC_F_USED and toggle used wrap counter if
last descriptor in ring is used. Perform a write memory barrier before
flags are written to descriptor.

Chained descriptors are not supported with this patch.

Signed-off-by: Jens Freimann 
---
 lib/librte_vhost/virtio_net.c | 129 ++
 1 file changed, 129 insertions(+)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 9a9ff92f9..1c7664a7d 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -695,6 +695,135 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,
return pkt_idx;
 }
 
+static inline uint32_t __attribute__((always_inline))
+vhost_enqueue_burst_packed(struct virtio_net *dev, uint16_t queue_id,
+ struct rte_mbuf **pkts, uint32_t count)
+{
+   struct vhost_virtqueue *vq;
+   struct vring_desc_packed *descs;
+   uint16_t idx;
+   uint16_t mask;
+   uint16_t i;
+
+   vq = dev->virtqueue[queue_id];
+
+   rte_spinlock_lock(&vq->access_lock);
+
+   if (unlikely(vq->enabled == 0)) {
+   i = 0;
+   goto out_access_unlock;
+   }
+
+   if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+   vhost_user_iotlb_rd_lock(vq);
+
+   descs = vq->desc_packed;
+   mask = vq->size - 1;
+
+   for (i = 0; i < count; i++) {
+   uint32_t desc_avail, desc_offset;
+   uint32_t mbuf_avail, mbuf_offset;
+   uint32_t cpy_len;
+   struct vring_desc_packed *desc;
+   uint64_t desc_addr;
+   struct virtio_net_hdr_mrg_rxbuf *hdr;
+   struct rte_mbuf *m = pkts[i];
+
+   idx = vq->last_used_idx & mask;
+   desc = &descs[idx];
+
+   if (!desc_is_avail(vq, desc))
+   break;
+   rte_smp_rmb();
+
+   desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+ sizeof(*desc), VHOST_ACCESS_RW);
+   /*
+* Checking of 'desc_addr' placed outside of 'unlikely' macro
+* to avoid performance issue with some versions of gcc (4.8.4
+* and 5.3.0) which otherwise stores offset on the stack instead
+* of in a register.
+*/
+   if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr)
+   break;
+
+   hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr;
+   virtio_enqueue_offload(m, &hdr->hdr);
+   vhost_log_write(dev, desc->addr, dev->vhost_hlen);
+   PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
+
+   desc_offset = dev->vhost_hlen;
+   desc_avail  = desc->len - dev->vhost_hlen;
+
+   mbuf_avail  = rte_pktmbuf_data_len(m);
+   mbuf_offset = 0;
+   while (mbuf_avail != 0 || m->next != NULL) {
+   /* done with current mbuf, fetch next */
+   if (mbuf_avail == 0) {
+   m = m->next;
+
+   mbuf_offset = 0;
+   mbuf_avail  = rte_pktmbuf_data_len(m);
+   }
+
+   /* done with current desc buf, fetch next */
+   if (desc_avail == 0) {
+   if ((desc->flags & VRING_DESC_F_NEXT) == 0) {
+   /* Room in vring buffer is not enough */
+   goto out;
+   }
+
+   idx = (idx+1) & (vq->size - 1);
+   desc = &descs[idx];
+   if (unlikely(!desc_is_avail(vq, desc)))
+   goto out;
+
+   desc_addr = vhost_iova_to_vva(dev, vq,
+   desc->addr, sizeof(*desc),
+   VHOST_ACCESS_RW);
+   if (unlikely(!desc_addr))
+   goto out;
+
+   desc_offset = 0;
+   desc_avail  = desc->len;
+   }
+
+   cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+   rte_memcpy((void *)((uintptr_t)
+   (desc_addr + desc_offset)),
+   rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
+   cpy_len);
+   vhost_log_write(dev, desc->addr + desc_offset, cpy_len);
+   PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
+cpy_len, 0);
+
+   

[dpdk-dev] [PATCH v4 17/20] vhost: add support for mergeable buffers with packed virtqueues

2018-04-19 Thread Jens Freimann
Signed-off-by: Jens Freimann 
---
 lib/librte_vhost/virtio_net.c | 143 ++
 1 file changed, 116 insertions(+), 27 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 1c7664a7d..e6e75f9a3 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -401,17 +401,53 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 }
 
 static __rte_always_inline int
-fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
-uint32_t avail_idx, uint32_t *vec_idx,
-struct buf_vector *buf_vec, uint16_t *desc_chain_head,
-uint16_t *desc_chain_len)
+__fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+struct buf_vector *buf_vec,
+uint32_t *len, uint32_t *vec_id)
+{
+   uint16_t idx = vq->last_avail_idx;
+   struct vring_desc_packed *descs = vq->desc_packed;
+   uint32_t _vec_id = *vec_id;
+
+   if (vq->desc_packed[idx].flags & VRING_DESC_F_INDIRECT) {
+   descs = (struct vring_desc_packed *)(uintptr_t)
+   vhost_iova_to_vva(dev, vq, vq->desc_packed[idx].addr,
+   vq->desc_packed[idx].len,
+   VHOST_ACCESS_RO);
+   if (unlikely(!descs))
+   return -1;
+
+   idx = 0;
+   }
+
+   while (1) {
+   if (unlikely(_vec_id >= BUF_VECTOR_MAX || idx >= vq->size))
+   return -1;
+
+   *len += descs[idx].len;
+   buf_vec[_vec_id].buf_addr = descs[idx].addr;
+   buf_vec[_vec_id].buf_len  = descs[idx].len;
+   buf_vec[_vec_id].desc_idx = idx;
+   _vec_id++;
+
+   if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
+   break;
+
+   idx = increase_index(idx, vq->size);
+   }
+   *vec_id = _vec_id;
+
+   return 0;
+}
+
+static __rte_always_inline int
+__fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
+struct buf_vector *buf_vec,
+uint32_t *len, uint32_t *vec_id, uint32_t avail_idx)
 {
uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
-   uint32_t vec_id = *vec_idx;
-   uint32_t len= 0;
struct vring_desc *descs = vq->desc;
-
-   *desc_chain_head = idx;
+   uint32_t _vec_id = *vec_id;
 
if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
descs = (struct vring_desc *)(uintptr_t)
@@ -425,20 +461,51 @@ fill_vec_buf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
}
 
while (1) {
-   if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size))
+   if (unlikely(_vec_id >= BUF_VECTOR_MAX || idx >= vq->size))
return -1;
 
-   len += descs[idx].len;
-   buf_vec[vec_id].buf_addr = descs[idx].addr;
-   buf_vec[vec_id].buf_len  = descs[idx].len;
-   buf_vec[vec_id].desc_idx = idx;
-   vec_id++;
+   *len += descs[idx].len;
+   buf_vec[_vec_id].buf_addr = descs[idx].addr;
+   buf_vec[_vec_id].buf_len  = descs[idx].len;
+   buf_vec[_vec_id].desc_idx = idx;
+   _vec_id++;
 
if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
break;
 
idx = descs[idx].next;
}
+   *vec_id = _vec_id;
+
+   return 0;
+}
+
+static __rte_always_inline int
+fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
+uint32_t avail_idx, uint32_t *vec_idx,
+struct buf_vector *buf_vec, uint16_t *desc_chain_head,
+uint16_t *desc_chain_len)
+{
+   uint16_t idx;
+   uint32_t vec_id = *vec_idx;
+   uint32_t len= 0;
+
+   if (dev->features & (1ULL << VIRTIO_F_RING_PACKED))
+   idx = vq->last_avail_idx;
+   else
+   idx = vq->avail->ring[avail_idx & (vq->size - 1)];
+
+   *desc_chain_head = idx;
+
+   if (dev->features & (1ULL << VIRTIO_F_RING_PACKED)) {
+   if (__fill_vec_buf_packed(dev, vq,
+   buf_vec, &len, &vec_id))
+   return -1;
+   } else {
+   if (__fill_vec_buf_split(dev, vq,
+   buf_vec, &len, &vec_id, avail_idx))
+   return -1;
+   }
 
*desc_chain_len = len;
*vec_idx = vec_id;
@@ -465,14 +532,16 @@ reserve_avail_buf_mergeable(struct virtio_net *dev, 
struct vhost_virtqueue *vq,
cur_idx  = vq->last_avail_idx;
 
while (size > 0) {
-   if (unlikely(cur_idx == avail_head))
+   if (unlikely(cur_idx == avai

[dpdk-dev] [PATCH v4 18/20] vhost: add event suppression for packed queues

2018-04-19 Thread Jens Freimann
Signed-off-by: Jens Freimann 
---
 lib/librte_vhost/vhost.c  | 15 +++
 lib/librte_vhost/vhost.h  | 63 ---
 lib/librte_vhost/vhost_user.c | 22 +++
 lib/librte_vhost/virtio_net.c | 15 ++-
 4 files changed, 98 insertions(+), 17 deletions(-)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index f7989cfbd..d07a8a347 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -23,6 +23,7 @@
 #include "iotlb.h"
 #include "vhost.h"
 #include "vhost_user.h"
+#include "virtio-packed.h"
 
 struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
 
@@ -577,10 +578,24 @@ int
 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
 {
struct virtio_net *dev = get_device(vid);
+   struct vhost_virtqueue *vq;
 
if (!dev)
return -1;
 
+   vq = dev->virtqueue[queue_id];
+   if (!vq->enabled)
+   return 0;
+
+   if (vq_is_packed(dev)) {
+   if (!enable) {
+   vq->driver_event->desc_event_flags |=
+   RING_EVENT_FLAGS_DISABLE;
+   } else
+   vq->driver_event->desc_event_flags |=
+   RING_EVENT_FLAGS_ENABLE;
+   }
+
if (enable)
dev->virtqueue[queue_id]->used->flags &=
~VRING_USED_F_NO_NOTIFY;
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 272d45f54..3e189f22a 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -69,14 +69,31 @@ struct batch_copy_elem {
uint64_t log_addr;
 };
 
+#define RING_EVENT_FLAGS_ENABLE 0x0
+#define RING_EVENT_FLAGS_DISABLE 0x1
+#define RING_EVENT_FLAGS_DESC 0x2
+#define RING_EVENT_FLAGS_MASK 0xFFFC
+#define RING_EVENT_WRAP_MASK 0x8000
+#define RING_EVENT_OFF_MASK 0x7FFF
+struct vring_packed_desc_event {
+   uint16_t desc_event_off_wrap;
+   uint16_t desc_event_flags;
+};
+
 /**
  * Structure contains variables relevant to RX/TX virtqueues.
  */
 struct vhost_virtqueue {
struct vring_desc   *desc;
struct vring_desc_packed   *desc_packed;
-   struct vring_avail  *avail;
-   struct vring_used   *used;
+   union {
+   struct vring_avail  *avail;
+   struct vring_packed_desc_event *driver_event;
+   };
+   union {
+   struct vring_used   *used;
+   struct vring_packed_desc_event *device_event;
+   };
uint32_tsize;
 
uint16_tlast_avail_idx;
@@ -210,7 +227,6 @@ struct vhost_msg {
(1ULL << VIRTIO_NET_F_MTU) | \
(1ULL << VIRTIO_F_IOMMU_PLATFORM))
 
-
 struct guest_page {
uint64_t guest_phys_addr;
uint64_t host_phys_addr;
@@ -475,6 +491,11 @@ vhost_need_event(uint16_t event_idx, uint16_t new_idx, 
uint16_t old)
 static __rte_always_inline void
 vhost_vring_call(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
+   uint16_t off_wrap, wrap = 0;
+   uint16_t event_flags;
+   uint16_t event_idx = 0;
+   int do_kick = 0;
+
/* Flush used->idx update before we read avail->flags. */
rte_mb();
 
@@ -482,22 +503,44 @@ vhost_vring_call(struct virtio_net *dev, struct 
vhost_virtqueue *vq)
if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
uint16_t old = vq->signalled_used;
uint16_t new = vq->last_used_idx;
+   if (dev->features & (1ULL << VIRTIO_F_RING_PACKED)) {
+   event_flags = vq->driver_event->desc_event_flags &
+   RING_EVENT_FLAGS_MASK;
+   if (!(event_flags & RING_EVENT_FLAGS_DESC))
+   do_kick = event_flags & RING_EVENT_FLAGS_ENABLE 
? 1 : 0;
+   else {
+   off_wrap = 
vq->driver_event->desc_event_off_wrap;
+   wrap = off_wrap & RING_EVENT_WRAP_MASK;
+   event_idx = off_wrap & RING_EVENT_OFF_MASK;
+   }
+   if (vhost_need_event(event_idx, new, old) &&
+   (vq->callfd >= 0) &&
+   (wrap == vq->used_wrap_counter)) {
+   vq->signalled_used = vq->last_used_idx;
+   do_kick = 1;
+   }
+   } else {
+   event_idx = vhost_used_event(vq);
+   if (vhost_need_event(event_idx, new, old)
+   && (vq->callfd >= 0)) {
+   vq->signalled_used = vq->last_used_idx;
+   do_kick = 1;
+   }
+   }
 
VHOST_LOG_DEBUG(VHOST_DATA, "%s: used_event

[dpdk-dev] [PATCH v4 19/20] net/virtio: by default disable packed virtqueues

2018-04-19 Thread Jens Freimann
Disable packed virtqueues for now and make it dependend on a build-time
config option. This can be reverted once we have missing features like
indirect descriptors implemented.

Signed-off-by: Jens Freimann 
---
 config/common_base |  1 +
 drivers/net/virtio/virtio_ethdev.c | 14 +-
 drivers/net/virtio/virtio_ethdev.h |  1 +
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/config/common_base b/config/common_base
index c4236fd1f..538cc520d 100644
--- a/config/common_base
+++ b/config/common_base
@@ -365,6 +365,7 @@ CONFIG_RTE_LIBRTE_VIRTIO_PMD=y
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DUMP=n
+CONFIG_RTE_LIBRTE_VIRTIO_PQ=n
 
 #
 # Compile virtio device emulation inside virtio PMD driver
diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 30c04aa19..806bde37a 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1233,6 +1233,10 @@ virtio_negotiate_features(struct virtio_hw *hw, uint64_t 
req_features)
req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
}
 
+#ifdef RTE_LIBRTE_VIRTIO_PQ
+   req_features |= (1ull << VIRTIO_F_RING_PACKED);
+#endif
+
/*
 * Negotiate features: Subset of device feature bits are written back
 * guest feature bits.
@@ -1391,11 +1395,7 @@ set_rxtx_funcs(struct rte_eth_dev *eth_dev)
 {
struct virtio_hw *hw = eth_dev->data->dev_private;
 
-   if (hw->use_simple_rx) {
-   PMD_INIT_LOG(INFO, "virtio: using simple Rx path on port %u",
-   eth_dev->data->port_id);
-   eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
-   } else if (vtpci_packed_queue(hw)) {
+   if (vtpci_packed_queue(hw)) {
if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF))
eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
else
@@ -1405,6 +1405,10 @@ set_rxtx_funcs(struct rte_eth_dev *eth_dev)
"virtio: using mergeable buffer Rx path on port %u",
eth_dev->data->port_id);
eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
+   } else if (hw->use_simple_rx) {
+   PMD_INIT_LOG(INFO, "virtio: using simple Rx path on port %u",
+   eth_dev->data->port_id);
+   eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
} else {
PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
eth_dev->data->port_id);
diff --git a/drivers/net/virtio/virtio_ethdev.h 
b/drivers/net/virtio/virtio_ethdev.h
index cb1399b3b..3aeced4bb 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -36,6 +36,7 @@
 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |  \
 1u << VIRTIO_RING_F_INDIRECT_DESC |\
 1ULL << VIRTIO_F_VERSION_1   | \
+1ULL << VIRTIO_F_RING_PACKED | \
 1ULL << VIRTIO_F_IOMMU_PLATFORM)
 
 #define VIRTIO_PMD_SUPPORTED_GUEST_FEATURES\
-- 
2.14.3



[dpdk-dev] [PATCH v4 20/20] vhost: by default disable packed virtqueues

2018-04-19 Thread Jens Freimann
From: Yuanhan Liu 

Add a built-time config option to enable packed virtqueues.
This config option activates the code do enqueue and dequeue packed to/from a
packed virtqueue.  Add feature bit for packed virtqueues as defined in
Virtio 1.1 draft.

Signed-off-by: Jens Freimann 
Signed-off-by: Yuanhan Liu 
---
 config/common_base|  1 +
 lib/librte_vhost/socket.c |  5 +
 lib/librte_vhost/vhost.h  |  1 +
 lib/librte_vhost/virtio_net.c | 16 +++-
 4 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/config/common_base b/config/common_base
index 538cc520d..83a8d6e67 100644
--- a/config/common_base
+++ b/config/common_base
@@ -802,6 +802,7 @@ CONFIG_RTE_LIBRTE_PDUMP=y
 CONFIG_RTE_LIBRTE_VHOST=n
 CONFIG_RTE_LIBRTE_VHOST_NUMA=n
 CONFIG_RTE_LIBRTE_VHOST_DEBUG=n
+CONFIG_RTE_LIBRTE_VHOST_PQ=n
 
 #
 # Compile vhost PMD
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index 636fc25c6..024fd26b8 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -850,6 +850,11 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
}
 
+#ifdef RTE_LIBRTE_VHOST_PQ
+   vsocket->features |= (1ULL << VIRTIO_F_RING_PACKED);
+   vsocket->features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC);
+#endif
+
if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
if (vsocket->reconnect && reconn_tid == 0) {
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 3e189f22a..1728f9f23 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -209,6 +209,7 @@ struct vhost_msg {
(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
(1ULL << VIRTIO_NET_F_MQ)  | \
(1ULL << VIRTIO_F_VERSION_1)   | \
+   (1ULL << VIRTIO_F_RING_PACKED) | \
(1ULL << VHOST_F_LOG_ALL)  | \
(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
(1ULL << VIRTIO_NET_F_GSO) | \
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 523e7d9f6..d670751ea 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -928,12 +928,14 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
return 0;
}
 
-   if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
-   return virtio_dev_merge_rx(dev, queue_id, pkts, count);
-   else if (vq_is_packed(dev))
-   return vhost_enqueue_burst_packed(dev, queue_id, pkts, count);
-   else
+   if (vq_is_packed(dev)) {
+   if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
+   return virtio_dev_merge_rx(dev, queue_id, pkts, count);
+   else
+   return vhost_enqueue_burst_packed(dev, queue_id, pkts, 
count);
+   } else {
return virtio_dev_rx(dev, queue_id, pkts, count);
+   }
 }
 
 static inline bool
@@ -1595,6 +1597,10 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
if (unlikely(vq->enabled == 0))
goto out_access_unlock;
 
+   if (vq_is_packed(dev))
+   return vhost_dequeue_burst_packed(dev, vq, mbuf_pool, pkts,
+ count);
+
vq->batch_copy_nb_elems = 0;
 
if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-- 
2.14.3



Re: [dpdk-dev] kernel binding of devices + hotplug

2018-04-19 Thread Thomas Monjalon
19/04/2018 08:04, Alejandro Lucero:
> I do not completely understand the discussion, but I think the disagreement
> is due to how some devices interact with DPDK, at least Mellanox ones. I'm
> saying that because we have a DPDK app which starts with no device at all
> (--no-pci) and it relies on device plugging attach/detach for configuring
> and removing ports once devices are bound to VFIO or UIO drivers. Maybe I'm
> wrong, but I think because Mellanox cards do not use VFIO or UIO drivers
> but some specific bound using verbs inside the PMD, leaving all this
> binding to the system does not fit them.

Mellanox uses a bifurcated model for any use.
Others could use a bifurcated model thanks to AF_XDP.
That's why it is more correct to compare "bifurcated model" vs "UIO/VFIO".

> If that is the case, although I agree with leaving the device binding to
> the system, I think it would be fair to contemplate a dual approach for
> legacy reasons, or to leave time for implementing a pseudo system driver
> which Mellanox can use for having same functionality.

I summarize the comparison:
- On one hand, we can configure all the devices only once in DPDK,
but it gives super-powers to the DPDK application.
- On the other hand, we can do a part of the setup at system level
(some kernel binding or flow bifurcation), and we do another part
of the setup in DPDK, splitting/duplicating the setup info in two places.


> On Wed, Apr 18, 2018 at 7:54 PM, Flavio Leitner  wrote:
> > On Wed, Apr 18, 2018 at 11:17:47AM -0700, Stephen Hemminger wrote:
> > > On Wed, 18 Apr 2018 11:11:01 -0300
> > > Flavio Leitner  wrote:
> > > > On Sun, Apr 15, 2018 at 01:48:36AM +, Stephen Hemminger wrote:
> > > > > My vote is to work with udev and not try to replace it.
> > > > >
> > > > > Driverctl works well. Just not for bifurcated driver
> > > >
> > > > I second that.  We also have other system configs to care about like
> > > > kernel parameters and hugepage configuration which I think follow the
> > > > same idea that they are system wide configs and should not be managed
> > > > by DPDK itself.
> > >
> > > Maybe teach driverctl (and udev) to handle bifurcated drivers.
> >
> > I don't know the challenges to tech driverctl to handle bifurcated
> > drivers but I would agree that it should be our first place to look at.
> >
> > > Unfortunately, vendors are very fractured on how network devices are
> > managed.
> >
> > You mean distros? hw vendors? all vendors? :)
> >
> > Perhaps if community focus on something, then they might follow at some
> > point.
> >
> > --
> > Flavio
> >
> 







Re: [dpdk-dev] [PATCH] examples/ip_pipeline: fix buffer not null terminated

2018-04-19 Thread Singh, Jasvinder


> -Original Message-
> From: Pattan, Reshma
> Sent: Wednesday, April 18, 2018 5:58 PM
> To: dev@dpdk.org
> Cc: Singh, Jasvinder ; Pattan, Reshma
> 
> Subject: [PATCH] examples/ip_pipeline: fix buffer not null terminated
> 
> Copying source string of length equal to sizeof(profile->name) will not append
> the NULL in destination.
> 
> Using strlcpy in place of strncpy fixes this issue as strlcpy guarantees NULL
> termination.
> 
> Coverity issue: 272580
> Fixes: 719374345c ("examples/ip_pipeline: add action profile objects")
> CC: jasvinder.si...@intel.com
> 
> Signed-off-by: Reshma Pattan 
> ---
>  examples/ip_pipeline/action.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/examples/ip_pipeline/action.c b/examples/ip_pipeline/action.c
> index 77a04fe19..d2cd7286c 100644
> --- a/examples/ip_pipeline/action.c
> +++ b/examples/ip_pipeline/action.c
> @@ -6,6 +6,8 @@
>  #include 
>  #include 
> 
> +#include 
> +
>  #include "action.h"
>  #include "hash_func.h"
> 
> @@ -345,7 +347,7 @@ table_action_profile_create(const char *name,
>   }
> 
>   /* Node fill in */
> - strncpy(profile->name, name, sizeof(profile->name));
> + strlcpy(profile->name, name, sizeof(profile->name));
>   memcpy(&profile->params, params, sizeof(*params));
>   profile->ap = ap;
> 
> --
> 2.14.3
Reviewed-by: Jasvinder Singh 



Re: [dpdk-dev] [PATCH] examples/ip_pipipeline: fix resource leak

2018-04-19 Thread Singh, Jasvinder


> -Original Message-
> From: Pattan, Reshma
> Sent: Wednesday, April 18, 2018 5:58 PM
> To: dev@dpdk.org
> Cc: Singh, Jasvinder ; Pattan, Reshma
> 
> Subject: [PATCH] examples/ip_pipipeline: fix resource leak
> 
> Close tap device fd before returning upon failures.
> 
> Coverity issue: 272576
> Fixes: 2f74ae28e2 ("examples/ip_pipeline: add tap object")
> CC: jasvinder.si...@intel.com
> 
> Signed-off-by: Reshma Pattan 
> ---
>  examples/ip_pipeline/tap.c | 9 ++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/examples/ip_pipeline/tap.c b/examples/ip_pipeline/tap.c index
> 5b3403218..a0f60867f 100644
> --- a/examples/ip_pipeline/tap.c
> +++ b/examples/ip_pipeline/tap.c
> @@ -76,14 +76,17 @@ tap_create(const char *name)
>   snprintf(ifr.ifr_name, IFNAMSIZ, "%s", name);
> 
>   status = ioctl(fd, TUNSETIFF, (void *) &ifr);
> - if (status < 0)
> + if (status < 0) {
> + close(fd);
>   return NULL;
> + }
> 
>   /* Node allocation */
>   tap = calloc(1, sizeof(struct tap));
> - if (tap == NULL)
> + if (tap == NULL) {
> + close(fd);
>   return NULL;
> -
> + }
>   /* Node fill in */
>   strncpy(tap->name, name, sizeof(tap->name));
>   tap->fd = fd;
> --
> 2.14.3
Reviewed-by: Jasvinder Singh 


Re: [dpdk-dev] [PATCH] examples/ip_pipeline: fixes uninitialized scalar variable

2018-04-19 Thread Singh, Jasvinder


> -Original Message-
> From: Pattan, Reshma
> Sent: Wednesday, April 18, 2018 5:58 PM
> To: dev@dpdk.org
> Cc: Singh, Jasvinder ; Pattan, Reshma
> 
> Subject: [PATCH] examples/ip_pipeline: fixes uninitialized scalar variable
> 
> Using uninitialized value p.thread_id when calling kni_create.
> Initialize the kni_params object to 0.
> 
> Coverity issue: 272569
> Fixes: 9a408cc8ac ("examples/ip_pipeline: add KNI object")
> CC: jasvinder.si...@intel.com
> 
> Signed-off-by: Reshma Pattan 
> ---
>  examples/ip_pipeline/cli.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/examples/ip_pipeline/cli.c b/examples/ip_pipeline/cli.c index
> 199a31ff8..575e176c1 100644
> --- a/examples/ip_pipeline/cli.c
> +++ b/examples/ip_pipeline/cli.c
> @@ -651,6 +651,7 @@ cmd_kni(char **tokens,
>   char *name;
>   struct kni *kni;
> 
> + memset(&p, 0, sizeof(p));
>   if ((n_tokens != 6) && (n_tokens != 8)) {
>   snprintf(out, out_size, MSG_ARG_MISMATCH, tokens[0]);
>   return;
> --
> 2.14.3

Reviewed-by: Jasvinder Singh 


Re: [dpdk-dev] [PATCH] examples/ip_pipeline: fix buffer not null terminated

2018-04-19 Thread Singh, Jasvinder


> -Original Message-
> From: Pattan, Reshma
> Sent: Wednesday, April 18, 2018 5:58 PM
> To: dev@dpdk.org
> Cc: Singh, Jasvinder ; Pattan, Reshma
> 
> Subject: [PATCH] examples/ip_pipeline: fix buffer not null terminated
> 
> Copying source string of length equal to sizeof(kni->name) will not append the
> NULL to destination string.
> 
> Using strlcpy in place of strncpy fixes this issue as strlcpy guarantees NULL
> termination.
> 
> Coverity issue: 272562
> Fixes: 9a408cc8ac ("examples/ip_pipeline: add KNI object")
> CC: jasvinder.si...@intel.com
> 
> Signed-off-by: Reshma Pattan 
> ---
>  examples/ip_pipeline/kni.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/examples/ip_pipeline/kni.c b/examples/ip_pipeline/kni.c index
> ed5f8942e..7e5ff0543 100644
> --- a/examples/ip_pipeline/kni.c
> +++ b/examples/ip_pipeline/kni.c
> @@ -7,6 +7,7 @@
> 
>  #include 
>  #include 
> +#include 
> 
>  #include "kni.h"
>  #include "mempool.h"
> @@ -153,7 +154,7 @@ kni_create(const char *name, struct kni_params
> *params)
>   return NULL;
> 
>   /* Node fill in */
> - strncpy(kni->name, name, sizeof(kni->name));
> + strlcpy(kni->name, name, sizeof(kni->name));
>   kni->k = k;
> 
>   /* Node add to list */
> --
> 2.14.3

Reviewed-by: Jasvinder Singh 


Re: [dpdk-dev] kernel binding of devices + hotplug

2018-04-19 Thread Bruce Richardson
On Thu, Apr 19, 2018 at 10:24:24AM +0200, Thomas Monjalon wrote:
> 19/04/2018 08:04, Alejandro Lucero:
> > I do not completely understand the discussion, but I think the
> > disagreement is due to how some devices interact with DPDK, at least
> > Mellanox ones. I'm saying that because we have a DPDK app which starts
> > with no device at all (--no-pci) and it relies on device plugging
> > attach/detach for configuring and removing ports once devices are bound
> > to VFIO or UIO drivers. Maybe I'm wrong, but I think because Mellanox
> > cards do not use VFIO or UIO drivers but some specific bound using
> > verbs inside the PMD, leaving all this binding to the system does not
> > fit them.
> 
> Mellanox uses a bifurcated model for any use.  Others could use a
> bifurcated model thanks to AF_XDP.  That's why it is more correct to
> compare "bifurcated model" vs "UIO/VFIO".
> 
> > If that is the case, although I agree with leaving the device binding
> > to the system, I think it would be fair to contemplate a dual approach
> > for legacy reasons, or to leave time for implementing a pseudo system
> > driver which Mellanox can use for having same functionality.
> 
> I summarize the comparison: - On one hand, we can configure all the
> devices only once in DPDK, but it gives super-powers to the DPDK
> application.  - On the other hand, we can do a part of the setup at
> system level (some kernel binding or flow bifurcation), and we do another
> part of the setup in DPDK, splitting/duplicating the setup info in two
> places.
> 
> 
I disagree with this final assessment. If there is duplication, it would
appear more in the former case than in the latter, as the logic for
determining driver binding and ownership would have to be duplicated in
both the kernel and DPDK.  Also, there are plenty of instances where the
kernel is going to use the devices without DPDK, so you can't reduce
duplication by putting functionality into DPDK - the kernel is not going to
remove functionality just because DPDK offers it! Therefore, if minimizing
duplication is important to you, the solution to chose is to use as much
from the kernel (and associated utilities like udev) as possible and not
implement it in DPDK.


[dpdk-dev] [PATCH 1/3] examples/l3fwd: convert to SPDX license tags

2018-04-19 Thread gavin . hu
From: Gavin Hu 

Signed-off-by: Gavin Hu 
---
 examples/l3fwd/l3fwd_common.h  | 35 +++
 examples/l3fwd/l3fwd_em_hlm.h  | 35 +++
 examples/l3fwd/l3fwd_em_hlm_neon.h | 35 +++
 examples/l3fwd/l3fwd_lpm_neon.h| 35 +++
 examples/l3fwd/l3fwd_neon.h| 36 +++-
 5 files changed, 15 insertions(+), 161 deletions(-)

diff --git a/examples/l3fwd/l3fwd_common.h b/examples/l3fwd/l3fwd_common.h
index 7002a43..df3e461 100644
--- a/examples/l3fwd/l3fwd_common.h
+++ b/examples/l3fwd/l3fwd_common.h
@@ -1,35 +1,6 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2016 Intel Corporation. All rights reserved.
- *   Copyright(c) 2017, Linaro Limited
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in
- *   the documentation and/or other materials provided with the
- *   distribution.
- * * Neither the name of Intel Corporation nor the names of its
- *   contributors may be used to endorse or promote products derived
- *   from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * Copyright(c) 2018 Linaro Limited. All rights reserved.
  */
 
 
diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
index 9d7afe0..1348f15 100644
--- a/examples/l3fwd/l3fwd_em_hlm.h
+++ b/examples/l3fwd/l3fwd_em_hlm.h
@@ -1,35 +1,6 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2016 Intel Corporation. All rights reserved.
- *   Copyright(c) 2017, Linaro Limited
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in
- *   the documentation and/or other materials provided with the
- *   distribution.
- * * Neither the name of Intel Corporation nor the names of its
- *   contributors may be used to endorse or promote products derived
- *   from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * Copyright(c) 2018 Linaro Limited. All rights reserved.
  */
 
 #ifndef __L3FWD_EM_HLM_H__
diff --git a/examples/l3fwd/l3fwd_em_hlm_neon.h 
b/examples/l3fwd/l3fwd_em_hlm_neon.h
index dae1acf..e57ba85 100644
--- a/examples/l3fwd/l3fwd_em_hlm_neon.h
+++ b/examples/l3fwd/l3fwd_em_hlm_neon.h
@@ -1,35 +1,6 @@
-/*-
- *   BSD LICENSE
- *
- *

[dpdk-dev] [PATCH 2/3] expamples/ip_pipeline: convert to SPDX license tags

2018-04-19 Thread gavin . hu
From: Gavin Hu 

Signed-off-by: Gavin Hu 
---
 examples/ip_pipeline/hash_func_arm64.h | 33 ++---
 1 file changed, 2 insertions(+), 31 deletions(-)

diff --git a/examples/ip_pipeline/hash_func_arm64.h 
b/examples/ip_pipeline/hash_func_arm64.h
index ae6c0f4..5871c33 100644
--- a/examples/ip_pipeline/hash_func_arm64.h
+++ b/examples/ip_pipeline/hash_func_arm64.h
@@ -1,34 +1,5 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2017 Linaro Limited. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in
- *   the documentation and/or other materials provided with the
- *   distribution.
- * * Neither the name of Intel Corporation nor the names of its
- *   contributors may be used to endorse or promote products derived
- *   from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Linaro Limited. All rights reserved.
  */
 #ifndef __HASH_FUNC_ARM64_H__
 #define __HASH_FUNC_ARM64_H__
-- 
1.8.3.1



[dpdk-dev] [PATCH 0/3] convert to SPDX-3 clause license tag

2018-04-19 Thread gavin . hu
From: Gavin Hu 

Convert to the new SPDX-3 clause license tag for the files with Linaro
Copyright only.

Gavin Hu (3):
  examples/l3fwd: convert to SPDX license tags
  expamples/ip_pipeline: convert to SPDX license tags
  drivers: convert to SPDX license tags

 drivers/net/i40e/i40e_rxtx_vec_neon.c  | 35 +++--
 examples/ip_pipeline/hash_func_arm64.h | 33 ++-
 examples/l3fwd/l3fwd_common.h  | 35 +++--
 examples/l3fwd/l3fwd_em_hlm.h  | 35 +++--
 examples/l3fwd/l3fwd_em_hlm_neon.h | 35 +++--
 examples/l3fwd/l3fwd_lpm_neon.h| 35 +++--
 examples/l3fwd/l3fwd_neon.h| 36 +++---
 7 files changed, 20 insertions(+), 224 deletions(-)

-- 
1.8.3.1



[dpdk-dev] [PATCH 3/3] drivers: convert to SPDX license tags

2018-04-19 Thread gavin . hu
From: Gavin Hu 

Signed-off-by: Gavin Hu 
---
 drivers/net/i40e/i40e_rxtx_vec_neon.c | 35 +++
 1 file changed, 3 insertions(+), 32 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c 
b/drivers/net/i40e/i40e_rxtx_vec_neon.c
index e549d1e..e7e5321 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c
@@ -1,35 +1,6 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
- *   Copyright(c) 2016, Linaro Limited
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in
- *   the documentation and/or other materials provided with the
- *   distribution.
- * * Neither the name of Intel Corporation nor the names of its
- *   contributors may be used to endorse or promote products derived
- *   from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * Copyright(c) 2018, Linaro Limited.  All rights reserved.
  */
 
 #include 
-- 
1.8.3.1



Re: [dpdk-dev] [PATCH 0/2] add MRVL MVPP2 PMD to meson

2018-04-19 Thread Tomasz Duszynski
On Wed, Apr 18, 2018 at 04:02:30PM +0100, Bruce Richardson wrote:
> On Fri, Apr 13, 2018 at 05:12:19PM +0100, Bruce Richardson wrote:
> > On Wed, Apr 11, 2018 at 01:45:05PM +0200, Tomasz Duszynski wrote:
> > > This patchseries adds MRVL MVPP2 PMD to meson build system.
> > >
> > > Tomasz Duszynski (2):
> > >   net/mvpp2: rename the version file to standard
> > >   net/mvpp2: add meson build file
> > >
> >
> > The patches look ok to me as far as the meson code is concerned, but I have
> > no way to test compilation etc. It doesn't cause issues with other x86 or
> > arm builds though, so:
> >
> > Series Acked-by: Bruce Richardson 
>
> +Pablo, who is looking at the crypto driver which is similar.
>
> I've just realised at this stage - while looking at something similar with
> the turbo_sw baseband driver - that the use of environmental variables is
> probably going to cause us problems down the line here. In the case of
> cross-compilation, the meson build is going to pull the environment
> variable of the host, and use that, even in cases where there is no
> cross-compile library available.
>
> I think that for cases like this, using a build option is a better
> solution. It explicitly can be set for each independent build, avoiding the
> cross-build issues I refer to, and also prevents us having issues with
> changing the path in the environment and meson not recognising the change
> (environment variables are not tracked for reconfigure, unlike options).
>
> So, would you be ok with changing this to take the MUSDK path from a meson
> option rather than the environment?
>
> /Bruce

I am okay with the proposed change. As for crypto patch I'll prepare v2.

--
- Tomasz Duszyński


Re: [dpdk-dev] [PATCH 0/2] add MRVL MVPP2 PMD to meson

2018-04-19 Thread Bruce Richardson
On Thu, Apr 19, 2018 at 10:55:33AM +0200, Tomasz Duszynski wrote:
> On Wed, Apr 18, 2018 at 04:02:30PM +0100, Bruce Richardson wrote:
> > On Fri, Apr 13, 2018 at 05:12:19PM +0100, Bruce Richardson wrote:
> > > On Wed, Apr 11, 2018 at 01:45:05PM +0200, Tomasz Duszynski wrote:
> > > > This patchseries adds MRVL MVPP2 PMD to meson build system.
> > > >
> > > > Tomasz Duszynski (2):
> > > >   net/mvpp2: rename the version file to standard
> > > >   net/mvpp2: add meson build file
> > > >
> > >
> > > The patches look ok to me as far as the meson code is concerned, but I 
> > > have
> > > no way to test compilation etc. It doesn't cause issues with other x86 or
> > > arm builds though, so:
> > >
> > > Series Acked-by: Bruce Richardson 
> >
> > +Pablo, who is looking at the crypto driver which is similar.
> >
> > I've just realised at this stage - while looking at something similar with
> > the turbo_sw baseband driver - that the use of environmental variables is
> > probably going to cause us problems down the line here. In the case of
> > cross-compilation, the meson build is going to pull the environment
> > variable of the host, and use that, even in cases where there is no
> > cross-compile library available.
> >
> > I think that for cases like this, using a build option is a better
> > solution. It explicitly can be set for each independent build, avoiding the
> > cross-build issues I refer to, and also prevents us having issues with
> > changing the path in the environment and meson not recognising the change
> > (environment variables are not tracked for reconfigure, unlike options).
> >
> > So, would you be ok with changing this to take the MUSDK path from a meson
> > option rather than the environment?
> >
> > /Bruce
> 
> I am okay with the proposed change. As for crypto patch I'll prepare v2.
> 
> --
> - Tomasz Duszyński

Thanks.


Re: [dpdk-dev] [PATCH v4 1/5] lib/ethdev: support for inline IPsec events

2018-04-19 Thread Anoob Joseph

Hi Thomas,

Are these changes fine? Can you review the changes and let me know if 
you have more comments.


Thanks,

Anoob


On 11/04/18 12:10, Anoob Joseph wrote:

Adding support for IPsec events in rte_eth_event framework. In inline
IPsec offload, the per packet protocol defined variables, like ESN,
would be managed by PMD. In such cases, PMD would need IPsec events
to notify application about various conditions like, ESN overflow.

Signed-off-by: Anoob Joseph 
Acked-by: Akhil Goyal 
---
v4:
* Added more details in documentation
* Renamed members of struct rte_eth_event_ipsec_desc for better readablity

v3:
* No change

v2:
* Added time expiry & byte expiry IPsec events in the enum

  lib/librte_ether/rte_ethdev.h | 41 +
  1 file changed, 41 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 5e13dca..2b36883 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -2436,6 +2436,46 @@ int
  rte_eth_tx_done_cleanup(uint16_t port_id, uint16_t queue_id, uint32_t 
free_cnt);
  
  /**

+ * Subtypes for IPsec offload event(@ref RTE_ETH_EVENT_IPSEC) raised by
+ * eth device.
+ */
+enum rte_eth_event_ipsec_subtype {
+   RTE_ETH_EVENT_IPSEC_UNKNOWN = 0,
+   /**< Unknown event type */
+   RTE_ETH_EVENT_IPSEC_ESN_OVERFLOW,
+   /**< Sequence number overflow */
+   RTE_ETH_EVENT_IPSEC_SA_TIME_EXPIRY,
+   /**< Soft time expiry of SA */
+   RTE_ETH_EVENT_IPSEC_SA_BYTE_EXPIRY,
+   /**< Soft byte expiry of SA */
+   RTE_ETH_EVENT_IPSEC_MAX
+   /**< Max value of this enum */
+};
+
+/**
+ * Descriptor for @ref RTE_ETH_EVENT_IPSEC event. Used by eth dev to send extra
+ * information of the IPsec offload event.
+ */
+struct rte_eth_event_ipsec_desc {
+   enum rte_eth_event_ipsec_subtype subtype;
+   /**< Type of RTE_ETH_EVENT_IPSEC_* event */
+   uint64_t metadata;
+   /**< Event specific metadata
+*
+* For the following events, *userdata* registered
+* with the *rte_security_session* would be returned
+* as metadata,
+*
+* - @ref RTE_ETH_EVENT_IPSEC_ESN_OVERFLOW
+* - @ref RTE_ETH_EVENT_IPSEC_SA_TIME_EXPIRY
+* - @ref RTE_ETH_EVENT_IPSEC_SA_BYTE_EXPIRY
+*
+* @see struct rte_security_session_conf
+*
+*/
+};
+
+/**
   * The eth device event type for interrupt, and maybe others in the future.
   */
  enum rte_eth_event_type {
@@ -2446,6 +2486,7 @@ enum rte_eth_event_type {
RTE_ETH_EVENT_INTR_RESET,
/**< reset interrupt event, sent to VF on PF reset */
RTE_ETH_EVENT_VF_MBOX,  /**< message from the VF received by PF */
+   RTE_ETH_EVENT_IPSEC,/**< IPsec offload related event */
RTE_ETH_EVENT_MACSEC,   /**< MACsec offload related event */
RTE_ETH_EVENT_INTR_RMV, /**< device removal event */
RTE_ETH_EVENT_NEW,  /**< port is probed */




Re: [dpdk-dev] [PATCH 1/2] eal: add macro to mark variable mostly read only

2018-04-19 Thread Pavan Nikhilesh
On Wed, Apr 18, 2018 at 07:03:06PM +0100, Ferruh Yigit wrote:
> On 4/18/2018 6:55 PM, Pavan Nikhilesh wrote:
> > On Wed, Apr 18, 2018 at 06:43:11PM +0100, Ferruh Yigit wrote:
> >> On 4/18/2018 4:30 PM, Pavan Nikhilesh wrote:
> >>> Add macro to mark a variable to be mostly read only and place it in a
> >>> separate section.
> >>>
> >>> Signed-off-by: Pavan Nikhilesh 
> >>> ---
> >>>
> >>>  Group together mostly read only data to avoid cacheline bouncing, also
> >>>  useful for auditing purposes.
> >>>
> >>>  lib/librte_eal/common/include/rte_common.h | 5 +
> >>>  1 file changed, 5 insertions(+)
> >>>
> >>> diff --git a/lib/librte_eal/common/include/rte_common.h 
> >>> b/lib/librte_eal/common/include/rte_common.h
> >>> index 6c5bc5a76..f2ff2e9e6 100644
> >>> --- a/lib/librte_eal/common/include/rte_common.h
> >>> +++ b/lib/librte_eal/common/include/rte_common.h
> >>> @@ -114,6 +114,11 @@ static void __attribute__((constructor(prio), used)) 
> >>> func(void)
> >>>   */
> >>>  #define __rte_noinline  __attribute__((noinline))
> >>>
> >>> +/**
> >>> + * Mark a variable to be mostly read only and place it in a separate 
> >>> section.
> >>> + */
> >>> +#define __rte_read_mostly __attribute__((__section__(".read_mostly")))
> >>
> >
> > Hi Ferruh,
> >
> >> Hi Pavan,
> >>
> >> Is the section ".read_mostly" treated specially [1] or is this just for 
> >> grouping
> >> symbols together (to reduce cacheline bouncing)?
> >
> > The section .read_mostly is not treated specially it's just for grouping
> > symbols.
>
> I have encounter with a blog post claiming this is not working:
>
> "
> The problem with the above approach is that once all the __read_mostly 
> variables
> are grouped into one section, the remaining "non-read-mostly" variables end-up
> together too. This increases the chances that two frequently used elements (in
> the "non-read-mostly" region) will end-up competing for the same position (or
> cache-line, the basic fixed-sized block for memory<-->cache transfers) in the
> cache. Thus frequent accesses will cause excessive cache thrashing on that
> particular cache-line thereby degrading the overall system performance.
> "
>
> https://thecodeartist.blogspot.com/2011/12/why-readmostly-does-not-work-as-it.html
>

The author is concerned about processors with less cache set-associativity,
almost all modern processors have >= 16 way set associativity. And the above
issue can happen even now when two frequently written global variables are
placed next to each other.

Currently, we don't have much control over how the global variables are
arranged and a single addition/deletion to the global variables causes change
in alignment and in some cases minor performance regression.
Tagging them as __read_mostly we can easily identify the alignment changes
across builds by comparing map files global variable section.

I have verified the patch-set on arm64 (16-way set-associative) and didn't
notice any performance regression.
Did you have a chance to verify if there is any performance regression?

> >
> >>
> >> [1]
> >> If this is special section, can you please point counter part in the 
> >> kernel?
> >
> > The kernel has something similar[1] but they have a custom linker script to
> > arrange symbols.
> >
> > [1] 
> > https://github.com/torvalds/linux/blob/a27fc14219f2e3c4a46ba9177b04d9b52c875532/arch/x86/include/asm/cache.h#L11
> > kernel commit id 54cb27a71f51d304342c79e62fd7667f2171062b
> >
> >>
> >>
> >>> +
> >>>  /*** Macros for pointer arithmetic /
> >>>
> >>>  /**
> >>> --
> >>> 2.17.0
> >>>
> >>
>


Re: [dpdk-dev] [dpdk-web] [PATCH v2] update stable releases roadmap

2018-04-19 Thread Kevin Traynor
On 04/18/2018 02:28 PM, Thomas Monjalon wrote:
> 18/04/2018 14:28, Ferruh Yigit:
>> On 4/18/2018 10:14 AM, Thomas Monjalon wrote:
>>> 18/04/2018 11:05, Ferruh Yigit:
 On 4/11/2018 12:28 AM, Thomas Monjalon wrote:
> - Typically a new stable release version follows a mainline release
> - by 1-2 weeks, depending on the test results.
> + The first stable release (.1) of a branch should follow
> + its mainline release (.0) by at least two months,
> + after the first release candidate (-rc1) of the next branch.

 Hi Thomas,

 What this change suggest? To be able to backport patches from rc1?
>>>
>>> Yes, it is the proposal we discussed earlier.
>>> We can wait one week after RC1 to get some validation confirmation.
>>> Do you agree?
>>
>> This has been discussed in tech-board, what I remember the decision was to 
>> wait
>> the release to backport patches into stable tree.
> 

Any minutes? I couldn't find them

> It was not so clear to me.
> I thought post-rc1 was acceptable. The idea is to speed-up stable releases
> pace, especially first release of a series.
> 
> 

I think timing of stable releases and bugfix backports to the stable
branch are two separate items.

I do think that bugfix backports to stable should happen on a regular
basis (e.g. every 2 weeks). Otherwise we are back to the situation where
if there's a bugfix after a DPDK release, a user like (surprise,
surprise) OVS may not be able to use that DPDK version for ~3 months.

Someone who wants to get the latest bugfixes can just take the latest on
the stable branch and importantly, can have confidence that the
community has officially accepted those patches. If someone requires
stable to be validated, then they have to wait until the release.

Kevin.





[dpdk-dev] [PATCH] net/mvpp2: get MUSDK library path from a meson option

2018-04-19 Thread Tomasz Duszynski
Since not cluttering environment and keeping all relevant
settings local to the cross build is reasonable get
MUSDK library installation path from a meson option.

Signed-off-by: Tomasz Duszynski 
---
 drivers/net/mvpp2/meson.build | 2 +-
 meson_options.txt | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mvpp2/meson.build b/drivers/net/mvpp2/meson.build
index 6ea13ee..e139889 100644
--- a/drivers/net/mvpp2/meson.build
+++ b/drivers/net/mvpp2/meson.build
@@ -3,7 +3,7 @@
 # Copyright(c) 2018 Semihalf.
 # All rights reserved.

-path = run_command('printenv', 'LIBMUSDK_PATH').stdout().strip()
+path = get_option('lib_musdk_dir')
 lib_dir = path + '/lib'
 inc_dir = path + '/include'

diff --git a/meson_options.txt b/meson_options.txt
index 1a674aa..c843278 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -8,6 +8,8 @@ option('include_subdir_arch', type: 'string', value: '',
description: 'subdirectory where to install arch-dependent headers')
 option('kernel_dir', type: 'string', value: '',
description: 'path to the kernel for building kernel modules')
+option('lib_musdk_dir', type: 'string', value: '',
+   description: 'path to the MUSDK library installation directory')
 option('machine', type: 'string', value: 'native',
description: 'set the target machine type')
 option('max_lcores', type: 'string', value: '128',
--
2.7.4



Re: [dpdk-dev] kernel binding of devices + hotplug

2018-04-19 Thread Thomas Monjalon
19/04/2018 10:40, Bruce Richardson:
> On Thu, Apr 19, 2018 at 10:24:24AM +0200, Thomas Monjalon wrote:
> > 19/04/2018 08:04, Alejandro Lucero:
> > > I do not completely understand the discussion, but I think the
> > > disagreement is due to how some devices interact with DPDK, at least
> > > Mellanox ones. I'm saying that because we have a DPDK app which starts
> > > with no device at all (--no-pci) and it relies on device plugging
> > > attach/detach for configuring and removing ports once devices are bound
> > > to VFIO or UIO drivers. Maybe I'm wrong, but I think because Mellanox
> > > cards do not use VFIO or UIO drivers but some specific bound using
> > > verbs inside the PMD, leaving all this binding to the system does not
> > > fit them.
> > 
> > Mellanox uses a bifurcated model for any use.  Others could use a
> > bifurcated model thanks to AF_XDP.  That's why it is more correct to
> > compare "bifurcated model" vs "UIO/VFIO".
> > 
> > > If that is the case, although I agree with leaving the device binding
> > > to the system, I think it would be fair to contemplate a dual approach
> > > for legacy reasons, or to leave time for implementing a pseudo system
> > > driver which Mellanox can use for having same functionality.
> > 
> > I summarize the comparison: - On one hand, we can configure all the
> > devices only once in DPDK, but it gives super-powers to the DPDK
> > application.  - On the other hand, we can do a part of the setup at
> > system level (some kernel binding or flow bifurcation), and we do another
> > part of the setup in DPDK, splitting/duplicating the setup info in two
> > places.
> > 
> I disagree with this final assessment. If there is duplication, it would
> appear more in the former case than in the latter, as the logic for
> determining driver binding and ownership would have to be duplicated in
> both the kernel and DPDK.  Also, there are plenty of instances where the
> kernel is going to use the devices without DPDK, so you can't reduce
> duplication by putting functionality into DPDK - the kernel is not going to
> remove functionality just because DPDK offers it! Therefore, if minimizing
> duplication is important to you, the solution to chose is to use as much
> from the kernel (and associated utilities like udev) as possible and not
> implement it in DPDK.

In any case, we need to choose and configure DPDK devices in DPDK
application, right?
If implementing all DPDK setup in DPDK, the only thing to do in system
config, is to NOT use the device (set IP, etc).
So preparing the devices for DPDK (or other userpace app) as part of the
system setup is an addition.

Note: I don't know which decision is better, I am just trying to think
about what the solutions are.




Re: [dpdk-dev] [PATCH v4 01/16] ethdev: add error types to flow API

2018-04-19 Thread Adrien Mazarguil
On Wed, Apr 18, 2018 at 10:24:10AM +0100, Ferruh Yigit wrote:
> On 4/18/2018 9:41 AM, Adrien Mazarguil wrote:
> > On Tue, Apr 17, 2018 at 08:37:31PM +0100, Ferruh Yigit wrote:
> >> On 4/16/2018 5:22 PM, Adrien Mazarguil wrote:
> >>> These enable more precise reporting of objects responsible for errors.
> >>>
> >>> This breaks ABI compatibility for the following public functions:
> >>>
> >>> - rte_flow_create()
> >>> - rte_flow_destroy()
> >>> - rte_flow_error_set()
> >>> - rte_flow_flush()
> >>> - rte_flow_isolate()
> >>> - rte_flow_query()
> >>> - rte_flow_validate()
> >>
> >> Is there a deprecation notice for this API break?
> > 
> > A notice covering the main changes in this series (most patches have an ABI
> > impact) was sent but not included [1]. This particular patch rides on the
> > announced ABI breakage in order to add a relatively minor feature while
> > there.
> 
> My take from "announced ABI breakage" is the deprecation notice get three acks
> and merged into release, so it seems there is no deprecation notice and
> according process first deprecation notice should go in this release.

True, so just describe how ABI impact is no worse than what was announced
(and included) [3] and most of them are actually opportune changes to
improve consistency and documentation since ABI would be broken for this
release regardless. Regarding individual patches:

- 01/16 "ethdev: add error types to flow API"
  => New error types are added in the middle of an existing enum.

- 02/16 "ethdev: clarify flow API pattern items and actions"
  => No impact.

- 03/16 "doc: remove flow API migration section"
  => No impact.

- 04/16 "ethdev: remove DUP action from flow API"
  => An action that no PMD supports is removed from an existing enum.

- 05/16 "ethdev: alter behavior of flow API actions"
  => A documentation change in how actions are processed logically
 breaks ABI in the case of repeated actions (currently a corner
 case). Most PMDs do not implement the original behavior correctly
 anyway (see commit log).

- 06/16 "ethdev: remove C99 flexible arrays from flow API"
  => ABI impact is primarily triggered by the RSS action change (already
 covered [3]). The RAW pattern item is also modified for consistency.
 On the API side, except when allocating these structures, there is no
 difference in usage (i.e. array[index] => pointer[index]).

- 07/16 "ethdev: flatten RSS configuration in flow API"
  => Already covered [3].

- 08/16 "ethdev: add hash function to RSS flow API action"
  => Already covered [3].

- 09/16 "ethdev: add encap level to RSS flow API action"
  => Already covered [3].

- 10/16 "ethdev: refine TPID handling in flow API"
  => No PMD supports the poorly defined TPID matching, applications couldn't
 possibly rely on it.

- 11/16 "ethdev: limit default VLAN TCI mask in flow API"
  => No ABI breakage, but a different behavior for applications that rely on
 the default mask. It doesn't look like any PMD supports PCP/DEI
 matching so again applications could not rely on it (they still can do
 it by providing a specific mask).

- 12/16 "ethdev: add transfer attribute to flow API"
  => Minor ABI impact (read: logical) due to the addition of a bit in an
 existing bit-field. No practical impact on applications.

- 13/16 "ethdev: update behavior of VF/PF in flow API"
  => Documentation (API) change. The "transfer" bit must now be set in order to
 use these actions with PMDs that support them.

- 14/16 "ethdev: rename physical port item in flow API"
  => API change for a pattern item supported by no PMD.

- 15/16 "ethdev: add physical port action to flow API"
  => New action added in the middle of an existing enum.

- 16/16 "ethdev: add port ID item and action to flow API"
  => New item/action added in the middle of existing enums.

> Hi Thomas,
> 
> Any comment on issue?
> 
> > 
> > This ABI change was implicitly needed by upcoming work for 18.05 (Xueming's
> > RSS stuff [2][3], Declan's TEP [4], the rest is summarized by a RFC [5]) due
> > to the necessary changes of behavior in flow rules.
> > 
> > Note that Xueming's deprecation notice [3] alone would have triggered such
> > an ABI change because struct rte_flow_action_rss wouldn't have been binary 
> > compatible if struct rte_eth_rss_conf was updated. This change would have
> > propagated back to rte_flow functions manipulating them.
> 
> To be honest I lost track of Xueming's patches, because of split/merge of
> patchset, multiple set with multiple versions out.
> 
> Is it possible to document the dependency graph including your set?

I hopefully didn't miss any:

 Bunch of flow API-related fixes (v5) [6]
 |
 `-- Flow API overhaul for switch offloads (v4) [7]
 |
 +-- additions to support tunnel encap/decap (v4) [8]
 |
 +-- introduce new tunnel types (v5) [9]
 |   |
 |   `-- mlx5 Rx tunnel offloading (v4) [10]
 |
 +-- rte_flow extension for vSwitch acceleration (v3.2) [11]
 

Re: [dpdk-dev] [PATCH 0/3] convert to SPDX-3 clause license tag

2018-04-19 Thread Hemant Agrawal

Series-Acked-by: Hemant Agrawal 

On 4/19/2018 2:22 PM, gavin...@linaro.org wrote:

From: Gavin Hu 

Convert to the new SPDX-3 clause license tag for the files with Linaro
Copyright only.

Gavin Hu (3):
   examples/l3fwd: convert to SPDX license tags
   expamples/ip_pipeline: convert to SPDX license tags
   drivers: convert to SPDX license tags

  drivers/net/i40e/i40e_rxtx_vec_neon.c  | 35 +++--
  examples/ip_pipeline/hash_func_arm64.h | 33 ++-
  examples/l3fwd/l3fwd_common.h  | 35 +++--
  examples/l3fwd/l3fwd_em_hlm.h  | 35 +++--
  examples/l3fwd/l3fwd_em_hlm_neon.h | 35 +++--
  examples/l3fwd/l3fwd_lpm_neon.h| 35 +++--
  examples/l3fwd/l3fwd_neon.h| 36 +++---
  7 files changed, 20 insertions(+), 224 deletions(-)



[dpdk-dev] [PATCH v6 02/11] net/mlx4: fix ignored RSS hash types

2018-04-19 Thread Adrien Mazarguil
When an unsupported hash type is part of a RSS configuration structure, it
is silently ignored instead of triggering an error. This may lead
applications to assume that such types are accepted, while they are in fact
not part of the resulting flow rules.

Fixes: 078b8b452e6b ("net/mlx4: add RSS flow rule action support")
Cc: sta...@dpdk.org

Signed-off-by: Adrien Mazarguil 
Acked-by: Nelio Laranjeiro 
Cc: Shahaf Shuler 
---
 drivers/net/mlx4/mlx4_flow.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index 2b4bf7094..67fd568bc 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -706,6 +706,7 @@ mlx4_flow_prepare(struct priv *priv,
const struct rte_flow_action_queue *queue;
const struct rte_flow_action_rss *rss;
const struct rte_eth_rss_conf *rss_conf;
+   uint64_t fields;
unsigned int i;
 
case RTE_FLOW_ACTION_TYPE_VOID:
@@ -780,10 +781,15 @@ mlx4_flow_prepare(struct priv *priv,
" of the context size";
goto exit_action_not_supported;
}
+   rte_errno = 0;
+   fields = mlx4_conv_rss_hf(priv, rss_conf->rss_hf);
+   if (fields == (uint64_t)-1 && rte_errno) {
+   msg = "unsupported RSS hash type requested";
+   goto exit_action_not_supported;
+   }
flow->rss = mlx4_rss_get
-   (priv,
-mlx4_conv_rss_hf(priv, rss_conf->rss_hf),
-rss_conf->rss_key, rss->num, rss->queue);
+   (priv, fields, rss_conf->rss_key, rss->num,
+rss->queue);
if (!flow->rss) {
msg = "either invalid parameters or not enough"
" resources for additional multi-queue"
-- 
2.11.0


[dpdk-dev] [PATCH v6 01/11] net/mlx4: fix RSS resource leak in case of error

2018-04-19 Thread Adrien Mazarguil
When memory cannot be allocated for a flow rule, its RSS context reference
is not dropped.

Fixes: 078b8b452e6b ("net/mlx4: add RSS flow rule action support")
Cc: sta...@dpdk.org

Signed-off-by: Adrien Mazarguil 
Acked-by: Nelio Laranjeiro 
Cc: Shahaf Shuler 
---
 drivers/net/mlx4/mlx4_flow.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index f3063ee8a..2b4bf7094 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -820,11 +820,14 @@ mlx4_flow_prepare(struct priv *priv,
},
};
 
-   if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec)))
+   if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec))) {
+   if (temp.rss)
+   mlx4_rss_put(temp.rss);
return rte_flow_error_set
(error, -rte_errno,
 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
 "flow rule handle allocation failure");
+   }
/* Most fields will be updated by second pass. */
*flow = (struct rte_flow){
.ibv_attr = temp.ibv_attr,
-- 
2.11.0


[dpdk-dev] [PATCH v6 00/11] Bunch of flow API-related fixes

2018-04-19 Thread Adrien Mazarguil
This series contains several fixes for rte_flow and its implementation in
PMDs and testpmd. Upcoming work on the flow API depends on it.

v6 changes:

- No change, rebased series and updated/fixed commit messages.

v5 changes:

- No change, rebased series to address conflicts.

v4 changes:

- Rebased again.
- The reliance on rte_eth_dev_rss_hash_conf_get() was removed from patch #7,
  see updated patch for details.

v3 changes:

- Rebased series.
- Dropped unnecessary "net/sfc: fix endian conversions in flow API".
- Dropped "ethdev: fix ABI version in meson build", handled by prior commit
  d9736a248785 ("ethdev: fix library version in meson build").

v2 changes:

- mlx5 fix (patch #3).
- bnxt fix (patch #4).
- sfc fix (patch #6).
- Missing include (patch #13).

Adrien Mazarguil (11):
  net/mlx4: fix RSS resource leak in case of error
  net/mlx4: fix ignored RSS hash types
  net/mlx5: fix RSS flow action bounds check
  net/bnxt: fix matching of flow API item masks
  app/testpmd: fix flow completion for RSS queues
  app/testpmd: fix lack of flow action configuration
  app/testpmd: fix RSS flow action configuration
  app/testpmd: fix missing RSS fields in flow action
  app/testpmd: fix missing boolean values in flow command
  ethdev: fix shallow copy of flow API RSS action
  ethdev: fix missing include in flow API

 app/test-pmd/cmdline.c  |   2 +
 app/test-pmd/cmdline_flow.c | 252 ---
 app/test-pmd/config.c   | 160 +-
 app/test-pmd/testpmd.h  |  13 ++
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |   8 +
 drivers/net/bnxt/bnxt_filter.c  |  14 +-
 drivers/net/mlx4/mlx4_flow.c|  17 +-
 drivers/net/mlx5/mlx5_flow.c|   9 +
 lib/librte_ether/rte_flow.c | 145 +
 lib/librte_ether/rte_flow.h |   2 +
 10 files changed, 494 insertions(+), 128 deletions(-)

-- 
2.11.0


[dpdk-dev] [PATCH v6 03/11] net/mlx5: fix RSS flow action bounds check

2018-04-19 Thread Adrien Mazarguil
The number of queues provided by the application is not checked against
parser's supported maximum.

Fixes: 3d821d6fea40 ("net/mlx5: support RSS action flow rule")
Cc: sta...@dpdk.org
Cc: Nelio Laranjeiro 

Signed-off-by: Adrien Mazarguil 
Acked-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_flow.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 7ef68de49..1ca413e32 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -16,6 +16,7 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
+#include 
 #include 
 #include 
 #include 
@@ -713,6 +714,14 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,
return -rte_errno;
}
}
+   if (rss->num > RTE_DIM(parser->queues)) {
+   rte_flow_error_set(error, EINVAL,
+  RTE_FLOW_ERROR_TYPE_ACTION,
+  actions,
+  "too many queues for RSS"
+  " context");
+   return -rte_errno;
+   }
for (n = 0; n < rss->num; ++n) {
if (rss->queue[n] >= priv->rxqs_n) {
rte_flow_error_set(error, EINVAL,
-- 
2.11.0


[dpdk-dev] [PATCH v6 05/11] app/testpmd: fix flow completion for RSS queues

2018-04-19 Thread Adrien Mazarguil
The lack of a working completion for RSS queues was overlooked during
development; until now only "end" was displayed as a valid token.

Fixes: 05d34c6e9d2c ("app/testpmd: add queue actions to flow command")
Cc: sta...@dpdk.org

Signed-off-by: Adrien Mazarguil 
Cc: Wenzhuo Lu 
Cc: Jingjing Wu 
---
 app/test-pmd/cmdline_flow.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 59f3b3b57..4640f18f7 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -2663,17 +2663,15 @@ static int
 comp_vc_action_rss_queue(struct context *ctx, const struct token *token,
 unsigned int ent, char *buf, unsigned int size)
 {
-   static const char *const str[] = { "", "end", NULL };
-   unsigned int i;
-
(void)ctx;
(void)token;
-   for (i = 0; str[i] != NULL; ++i)
-   if (buf && i == ent)
-   return snprintf(buf, size, "%s", str[i]);
-   if (buf)
-   return -1;
-   return i;
+   if (!buf)
+   return nb_rxq + 1;
+   if (ent < nb_rxq)
+   return snprintf(buf, size, "%u", ent);
+   if (ent == nb_rxq)
+   return snprintf(buf, size, "end");
+   return -1;
 }
 
 /** Internal context. */
-- 
2.11.0


[dpdk-dev] [PATCH v6 04/11] net/bnxt: fix matching of flow API item masks

2018-04-19 Thread Adrien Mazarguil
Some values are interpreted without endian conversion and/or without
taking the proper mask into account.

Fixes: 5ef3b79fdfe6 ("net/bnxt: support flow filter ops")
Cc: sta...@dpdk.org
Cc: Ajit Khaparde 

Signed-off-by: Adrien Mazarguil 
---
 drivers/net/bnxt/bnxt_filter.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_filter.c b/drivers/net/bnxt/bnxt_filter.c
index 9351460c2..fdd94bf02 100644
--- a/drivers/net/bnxt/bnxt_filter.c
+++ b/drivers/net/bnxt/bnxt_filter.c
@@ -5,6 +5,7 @@
 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -354,7 +355,8 @@ bnxt_validate_and_parse_flow_type(struct bnxt *bp,
}
 
/* Mask is not allowed. Only exact matches are */
-   if ((eth_mask->type & UINT16_MAX) != UINT16_MAX) {
+   if (eth_mask->type &&
+   eth_mask->type != RTE_BE16(0x)) {
rte_flow_error_set(error, EINVAL,
   RTE_FLOW_ERROR_TYPE_ITEM,
   item,
@@ -380,7 +382,7 @@ bnxt_validate_and_parse_flow_type(struct bnxt *bp,
   *  RTE_LOG(ERR, PMD, "Handle this condition\n");
   * }
   */
-   if (eth_spec->type) {
+   if (eth_mask->type) {
filter->ethertype =
rte_be_to_cpu_16(eth_spec->type);
en |= use_ntuple ?
@@ -392,13 +394,15 @@ bnxt_validate_and_parse_flow_type(struct bnxt *bp,
case RTE_FLOW_ITEM_TYPE_VLAN:
vlan_spec = item->spec;
vlan_mask = item->mask;
-   if (vlan_mask->tci & 0x && !vlan_mask->tpid) {
+   if (vlan_mask->tci &&
+   vlan_mask->tci == RTE_BE16(0x0fff) &&
+   !vlan_mask->tpid) {
/* Only the VLAN ID can be matched. */
filter->l2_ovlan =
rte_be_to_cpu_16(vlan_spec->tci &
-0xFFF);
+RTE_BE16(0x0fff));
en |= EM_FLOW_ALLOC_INPUT_EN_OVLAN_VID;
-   } else {
+   } else if (vlan_mask->tci || vlan_mask->tpid) {
rte_flow_error_set(error, EINVAL,
   RTE_FLOW_ERROR_TYPE_ITEM,
   item,
-- 
2.11.0


[dpdk-dev] [PATCH v6 06/11] app/testpmd: fix lack of flow action configuration

2018-04-19 Thread Adrien Mazarguil
Configuration structure is not optional with flow rule actions that expect
one; this pointer is not supposed to be NULL and PMDs should not have to
verify it.

Like pattern item spec/last/mask fields, it is currently set when at least
one configuration parameter is provided on the command line. This patch
sets it as soon as an action is created instead.

Fixes: 7a91969ad35e ("app/testpmd: add various actions to flow command")
Cc: sta...@dpdk.org

Signed-off-by: Adrien Mazarguil 
Acked-by: Nelio Laranjeiro 
Cc: Wenzhuo Lu 
Cc: Jingjing Wu 
---
 app/test-pmd/cmdline_flow.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 4640f18f7..a0e06db36 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -1909,6 +1909,7 @@ parse_vc(struct context *ctx, const struct token *token,
return -1;
*action = (struct rte_flow_action){
.type = priv->type,
+   .conf = data_size ? data : NULL,
};
++out->args.vc.actions_n;
ctx->object = action;
@@ -1989,7 +1990,6 @@ parse_vc_conf(struct context *ctx, const struct token 
*token,
  void *buf, unsigned int size)
 {
struct buffer *out = buf;
-   struct rte_flow_action *action;
 
(void)size;
/* Token name must match. */
@@ -1998,14 +1998,9 @@ parse_vc_conf(struct context *ctx, const struct token 
*token,
/* Nothing else to do if there is no buffer. */
if (!out)
return len;
-   if (!out->args.vc.actions_n)
-   return -1;
-   action = &out->args.vc.actions[out->args.vc.actions_n - 1];
/* Point to selected object. */
ctx->object = out->args.vc.data;
ctx->objmask = NULL;
-   /* Update configuration pointer. */
-   action->conf = ctx->object;
return len;
 }
 
-- 
2.11.0


[dpdk-dev] [PATCH v6 08/11] app/testpmd: fix missing RSS fields in flow action

2018-04-19 Thread Adrien Mazarguil
Users cannot override the default RSS settings when entering a RSS action,
only a list of queues can be provided.

This patch enables them to set a RSS hash key and types for a flow rule.

Fixes: 05d34c6e9d2c ("app/testpmd: add queue actions to flow command")
Cc: sta...@dpdk.org

Signed-off-by: Adrien Mazarguil 
Acked-by: Nelio Laranjeiro 
Cc: Wenzhuo Lu 
Cc: Jingjing Wu 
---
 app/test-pmd/cmdline_flow.c | 133 ++-
 app/test-pmd/config.c   |  20 ++--
 app/test-pmd/testpmd.h  |  13 +++
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |   8 ++
 4 files changed, 163 insertions(+), 11 deletions(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index d37c5f39f..fcd76b56e 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -167,6 +167,10 @@ enum index {
ACTION_DUP,
ACTION_DUP_INDEX,
ACTION_RSS,
+   ACTION_RSS_TYPES,
+   ACTION_RSS_TYPE,
+   ACTION_RSS_KEY,
+   ACTION_RSS_KEY_LEN,
ACTION_RSS_QUEUES,
ACTION_RSS_QUEUE,
ACTION_PF,
@@ -223,6 +227,9 @@ struct context {
 struct arg {
uint32_t hton:1; /**< Use network byte ordering. */
uint32_t sign:1; /**< Value is signed. */
+   uint32_t bounded:1; /**< Value is bounded. */
+   uintmax_t min; /**< Minimum value if bounded. */
+   uintmax_t max; /**< Maximum value if bounded. */
uint32_t offset; /**< Relative offset from ctx->object. */
uint32_t size; /**< Field size. */
const uint8_t *mask; /**< Bit-mask to use instead of offset/size. */
@@ -329,6 +336,16 @@ struct token {
.size = (s), \
})
 
+/** Same as ARGS_ENTRY_ARB() with bounded values. */
+#define ARGS_ENTRY_ARB_BOUNDED(o, s, i, a) \
+   (&(const struct arg){ \
+   .bounded = 1, \
+   .min = (i), \
+   .max = (a), \
+   .offset = (o), \
+   .size = (s), \
+   })
+
 /** Same as ARGS_ENTRY() using network byte ordering. */
 #define ARGS_ENTRY_HTON(s, f) \
(&(const struct arg){ \
@@ -635,6 +652,9 @@ static const enum index action_dup[] = {
 };
 
 static const enum index action_rss[] = {
+   ACTION_RSS_TYPES,
+   ACTION_RSS_KEY,
+   ACTION_RSS_KEY_LEN,
ACTION_RSS_QUEUES,
ACTION_NEXT,
ZERO,
@@ -666,6 +686,9 @@ static int parse_vc_conf(struct context *, const struct 
token *,
 static int parse_vc_action_rss(struct context *, const struct token *,
   const char *, unsigned int, void *,
   unsigned int);
+static int parse_vc_action_rss_type(struct context *, const struct token *,
+   const char *, unsigned int, void *,
+   unsigned int);
 static int parse_vc_action_rss_queue(struct context *, const struct token *,
 const char *, unsigned int, void *,
 unsigned int);
@@ -721,6 +744,8 @@ static int comp_port(struct context *, const struct token *,
 unsigned int, char *, unsigned int);
 static int comp_rule_id(struct context *, const struct token *,
unsigned int, char *, unsigned int);
+static int comp_vc_action_rss_type(struct context *, const struct token *,
+  unsigned int, char *, unsigned int);
 static int comp_vc_action_rss_queue(struct context *, const struct token *,
unsigned int, char *, unsigned int);
 
@@ -1593,6 +1618,43 @@ static const struct token token_list[] = {
.next = NEXT(action_rss),
.call = parse_vc_action_rss,
},
+   [ACTION_RSS_TYPES] = {
+   .name = "types",
+   .help = "RSS hash types",
+   .next = NEXT(action_rss, NEXT_ENTRY(ACTION_RSS_TYPE)),
+   },
+   [ACTION_RSS_TYPE] = {
+   .name = "{type}",
+   .help = "RSS hash type",
+   .call = parse_vc_action_rss_type,
+   .comp = comp_vc_action_rss_type,
+   },
+   [ACTION_RSS_KEY] = {
+   .name = "key",
+   .help = "RSS hash key",
+   .next = NEXT(action_rss, NEXT_ENTRY(STRING)),
+   .args = ARGS(ARGS_ENTRY_ARB
+(((uintptr_t)&((union action_rss_data *)0)->
+  s.rss_conf.rss_key_len),
+ sizeof(((struct rte_eth_rss_conf *)0)->
+rss_key_len)),
+ARGS_ENTRY_ARB
+(((uintptr_t)((union action_rss_data *)0)->
+  s.rss_key),
+ RSS_HASH_KEY_LENGTH)),
+   },
+   [ACTION_RSS_KEY_LEN] = {
+   .name = "key_len",
+   .help = "RSS hash key length in bytes

[dpdk-dev] [PATCH v6 07/11] app/testpmd: fix RSS flow action configuration

2018-04-19 Thread Adrien Mazarguil
Except for a list of queues, RSS configuration (hash key and fields) cannot
be specified from the flow command line and testpmd does not provide safe
defaults either.

In order to validate their implementation with testpmd, PMDs had to
interpret its NULL RSS configuration parameters somehow, however this has
never been valid to begin with.

This patch makes testpmd always provide default values.

The list of RSS types to use is exclusively taken from the global "rss_hf"
variable, itself configured through the "port config all rss" command or
--rss-ip/--rss-udp command-line options.

Fixes: 05d34c6e9d2c ("app/testpmd: add queue actions to flow command")
Cc: sta...@dpdk.org

Signed-off-by: Adrien Mazarguil 
Acked-by: Nelio Laranjeiro 
Cc: Wenzhuo Lu 
Cc: Jingjing Wu 
Cc: Xueming Li 

---

v4 changes:

Removed reliance on rte_eth_dev_rss_hash_conf_get(), which as reported by
Xueming, is not necessarily supported and triggers a misleading "Function
not implemented" warning. Updated commit log to reflect this.
---
 app/test-pmd/cmdline.c  |   2 +
 app/test-pmd/cmdline_flow.c | 101 
 app/test-pmd/config.c   | 140 +++
 3 files changed, 190 insertions(+), 53 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 512e3b55e..9704d0454 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -2033,6 +2033,8 @@ cmd_config_rss_parsed(void *parsed_result,
return;
}
rss_conf.rss_key = NULL;
+   /* Update global configuration for RSS types. */
+   rss_hf = rss_conf.rss_hf;
for (i = 0; i < rte_eth_dev_count(); i++) {
diag = rte_eth_dev_rss_hash_update(i, &rss_conf);
if (diag < 0)
diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index a0e06db36..d37c5f39f 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -184,13 +184,19 @@ enum index {
 #define ITEM_RAW_SIZE \
(offsetof(struct rte_flow_item_raw, pattern) + ITEM_RAW_PATTERN_SIZE)
 
-/** Number of queue[] entries in struct rte_flow_action_rss. */
-#define ACTION_RSS_NUM 32
-
-/** Storage size for struct rte_flow_action_rss including queues. */
-#define ACTION_RSS_SIZE \
-   (offsetof(struct rte_flow_action_rss, queue) + \
-sizeof(*((struct rte_flow_action_rss *)0)->queue) * ACTION_RSS_NUM)
+/** Maximum number of queue indices in struct rte_flow_action_rss. */
+#define ACTION_RSS_QUEUE_NUM 32
+
+/** Storage for struct rte_flow_action_rss including external data. */
+union action_rss_data {
+   struct rte_flow_action_rss conf;
+   struct {
+   uint8_t conf_data[offsetof(struct rte_flow_action_rss, queue)];
+   uint16_t queue[ACTION_RSS_QUEUE_NUM];
+   struct rte_eth_rss_conf rss_conf;
+   uint8_t rss_key[RSS_HASH_KEY_LENGTH];
+   } s;
+};
 
 /** Maximum number of subsequent tokens and arguments on the stack. */
 #define CTX_STACK_SIZE 16
@@ -316,6 +322,13 @@ struct token {
.size = (sz), \
})
 
+/** Static initializer for ARGS() with arbitrary offset and size. */
+#define ARGS_ENTRY_ARB(o, s) \
+   (&(const struct arg){ \
+   .offset = (o), \
+   .size = (s), \
+   })
+
 /** Same as ARGS_ENTRY() using network byte ordering. */
 #define ARGS_ENTRY_HTON(s, f) \
(&(const struct arg){ \
@@ -650,6 +663,9 @@ static int parse_vc_spec(struct context *, const struct 
token *,
 const char *, unsigned int, void *, unsigned int);
 static int parse_vc_conf(struct context *, const struct token *,
 const char *, unsigned int, void *, unsigned int);
+static int parse_vc_action_rss(struct context *, const struct token *,
+  const char *, unsigned int, void *,
+  unsigned int);
 static int parse_vc_action_rss_queue(struct context *, const struct token *,
 const char *, unsigned int, void *,
 unsigned int);
@@ -1573,9 +1589,9 @@ static const struct token token_list[] = {
[ACTION_RSS] = {
.name = "rss",
.help = "spread packets among several queues",
-   .priv = PRIV_ACTION(RSS, ACTION_RSS_SIZE),
+   .priv = PRIV_ACTION(RSS, sizeof(union action_rss_data)),
.next = NEXT(action_rss),
-   .call = parse_vc,
+   .call = parse_vc_action_rss,
},
[ACTION_RSS_QUEUES] = {
.name = "queues",
@@ -2004,6 +2020,61 @@ parse_vc_conf(struct context *ctx, const struct token 
*token,
return len;
 }
 
+/** Parse RSS action. */
+static int
+parse_vc_action_rss(struct context *ctx, const struct token *token,
+   const char *str, unsigned int len,
+   void *buf, unsigned int size)
+{
+   struct buf

[dpdk-dev] [PATCH v6 09/11] app/testpmd: fix missing boolean values in flow command

2018-04-19 Thread Adrien Mazarguil
Original implementation lacks the on/off toggle.

This patch shows up as a fix because it has been a popular request ever
since the first DPDK release with the original implementation but was never
addressed.

Fixes: abc3d81aca1b ("app/testpmd: add item raw to flow command")
Cc: sta...@dpdk.org

Signed-off-by: Adrien Mazarguil 
Acked-by: Nelio Laranjeiro 
Cc: Matan Azrad 

--

v6 changes:

Fixed title (ethdev => app/testpmd) following Matan's comment [1].

[1] http://dpdk.org/ml/archives/dev/2018-April/097457.html
---
 app/test-pmd/cmdline_flow.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index fcd76b56e..f0b4b7bc4 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -2692,6 +2692,7 @@ static const char *const boolean_name[] = {
"false", "true",
"no", "yes",
"N", "Y",
+   "off", "on",
NULL,
 };
 
-- 
2.11.0


[dpdk-dev] [PATCH v6 11/11] ethdev: fix missing include in flow API

2018-04-19 Thread Adrien Mazarguil
Fixes: b1a4b4cbc0a8 ("ethdev: introduce generic flow API")
Cc: sta...@dpdk.org

Signed-off-by: Adrien Mazarguil 
Acked-by: Nelio Laranjeiro 
---
 lib/librte_ether/rte_flow.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h
index 56c733451..44ae19d3b 100644
--- a/lib/librte_ether/rte_flow.h
+++ b/lib/librte_ether/rte_flow.h
@@ -14,6 +14,8 @@
  * associated actions in hardware through flow rules.
  */
 
+#include 
+
 #include 
 #include 
 #include 
-- 
2.11.0


[dpdk-dev] [PATCH v6 10/11] ethdev: fix shallow copy of flow API RSS action

2018-04-19 Thread Adrien Mazarguil
The rss_conf field is defined as a pointer to struct rte_eth_rss_conf.

Even assuming it is permanently allocated and a pointer copy is safe,
pointed data may change and not reflect an applied flow rule anymore.

This patch aligns with testpmd by making a deep copy instead.

Fixes: 18da437b5f63 ("ethdev: add flow rule copy function")
Cc: sta...@dpdk.org
Cc: Gaetan Rivet 

Signed-off-by: Adrien Mazarguil 
Acked-by: Nelio Laranjeiro 
Cc: Thomas Monjalon 
---
 lib/librte_ether/rte_flow.c | 145 +++
 1 file changed, 102 insertions(+), 43 deletions(-)

diff --git a/lib/librte_ether/rte_flow.c b/lib/librte_ether/rte_flow.c
index a3823d874..ada280810 100644
--- a/lib/librte_ether/rte_flow.c
+++ b/lib/librte_ether/rte_flow.c
@@ -255,60 +255,119 @@ rte_flow_error_set(struct rte_flow_error *error,
return -code;
 }
 
-/** Compute storage space needed by item specification. */
-static void
-flow_item_spec_size(const struct rte_flow_item *item,
-   size_t *size, size_t *pad)
+/** Pattern item specification types. */
+enum item_spec_type {
+   ITEM_SPEC,
+   ITEM_LAST,
+   ITEM_MASK,
+};
+
+/** Compute storage space needed by item specification and copy it. */
+static size_t
+flow_item_spec_copy(void *buf, const struct rte_flow_item *item,
+   enum item_spec_type type)
 {
-   if (!item->spec) {
-   *size = 0;
+   size_t size = 0;
+   const void *item_spec =
+   type == ITEM_SPEC ? item->spec :
+   type == ITEM_LAST ? item->last :
+   type == ITEM_MASK ? item->mask :
+   NULL;
+
+   if (!item_spec)
goto empty;
-   }
switch (item->type) {
union {
const struct rte_flow_item_raw *raw;
-   } spec;
+   } src;
+   union {
+   struct rte_flow_item_raw *raw;
+   } dst;
 
-   /* Not a fall-through */
case RTE_FLOW_ITEM_TYPE_RAW:
-   spec.raw = item->spec;
-   *size = offsetof(struct rte_flow_item_raw, pattern) +
-   spec.raw->length * sizeof(*spec.raw->pattern);
+   src.raw = item_spec;
+   dst.raw = buf;
+   size = offsetof(struct rte_flow_item_raw, pattern) +
+   src.raw->length * sizeof(*src.raw->pattern);
+   if (dst.raw)
+   memcpy(dst.raw, src.raw, size);
break;
default:
-   *size = rte_flow_desc_item[item->type].size;
+   size = rte_flow_desc_item[item->type].size;
+   if (buf)
+   memcpy(buf, item_spec, size);
break;
}
 empty:
-   *pad = RTE_ALIGN_CEIL(*size, sizeof(double)) - *size;
+   return RTE_ALIGN_CEIL(size, sizeof(double));
 }
 
-/** Compute storage space needed by action configuration. */
-static void
-flow_action_conf_size(const struct rte_flow_action *action,
- size_t *size, size_t *pad)
+/** Compute storage space needed by action configuration and copy it. */
+static size_t
+flow_action_conf_copy(void *buf, const struct rte_flow_action *action)
 {
-   if (!action->conf) {
-   *size = 0;
+   size_t size = 0;
+
+   if (!action->conf)
goto empty;
-   }
switch (action->type) {
union {
const struct rte_flow_action_rss *rss;
-   } conf;
+   } src;
+   union {
+   struct rte_flow_action_rss *rss;
+   } dst;
+   size_t off;
 
-   /* Not a fall-through. */
case RTE_FLOW_ACTION_TYPE_RSS:
-   conf.rss = action->conf;
-   *size = offsetof(struct rte_flow_action_rss, queue) +
-   conf.rss->num * sizeof(*conf.rss->queue);
+   src.rss = action->conf;
+   dst.rss = buf;
+   off = 0;
+   if (dst.rss)
+   *dst.rss = (struct rte_flow_action_rss){
+   .num = src.rss->num,
+   };
+   off += offsetof(struct rte_flow_action_rss, queue);
+   if (src.rss->num) {
+   size = sizeof(*src.rss->queue) * src.rss->num;
+   if (dst.rss)
+   memcpy(dst.rss->queue, src.rss->queue, size);
+   off += size;
+   }
+   off = RTE_ALIGN_CEIL(off, sizeof(double));
+   if (dst.rss) {
+   dst.rss->rss_conf = (void *)((uintptr_t)dst.rss + off);
+   *(struct rte_eth_rss_conf *)(uintptr_t)
+   dst.rss->rss_conf = (struct rte_eth_rss_conf){
+   .rss_key_len = src.rss->rss_conf->rss_key_len,
+  

[dpdk-dev] [PATCH v5 01/16] ethdev: add error types to flow API

2018-04-19 Thread Adrien Mazarguil
These enable more precise reporting of objects responsible for errors.

This breaks ABI compatibility for the following public functions:

- rte_flow_create()
- rte_flow_destroy()
- rte_flow_error_set()
- rte_flow_flush()
- rte_flow_isolate()
- rte_flow_query()
- rte_flow_validate()

Signed-off-by: Adrien Mazarguil 
Acked-by: Andrew Rybchenko 
---
 app/test-pmd/config.c   |  4 
 lib/librte_ether/rte_ethdev_version.map | 20 +---
 lib/librte_ether/rte_flow.h |  4 
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 5daa93bb3..a7645adb8 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -1244,8 +1244,12 @@ port_flow_complain(struct rte_flow_error *error)
[RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field",
[RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure",
[RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length",
+   [RTE_FLOW_ERROR_TYPE_ITEM_SPEC] = "item specification",
+   [RTE_FLOW_ERROR_TYPE_ITEM_LAST] = "item specification range",
+   [RTE_FLOW_ERROR_TYPE_ITEM_MASK] = "item specification mask",
[RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item",
[RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions",
+   [RTE_FLOW_ERROR_TYPE_ACTION_CONF] = "action configuration",
[RTE_FLOW_ERROR_TYPE_ACTION] = "specific action",
};
const char *errstr;
diff --git a/lib/librte_ether/rte_ethdev_version.map 
b/lib/librte_ether/rte_ethdev_version.map
index 34df6c8b5..e915e7929 100644
--- a/lib/librte_ether/rte_ethdev_version.map
+++ b/lib/librte_ether/rte_ethdev_version.map
@@ -127,11 +127,6 @@ DPDK_17.02 {
 
_rte_eth_dev_reset;
rte_eth_dev_fw_version_get;
-   rte_flow_create;
-   rte_flow_destroy;
-   rte_flow_flush;
-   rte_flow_query;
-   rte_flow_validate;
 
 } DPDK_16.07;
 
@@ -153,7 +148,6 @@ DPDK_17.08 {
_rte_eth_dev_callback_process;
rte_eth_dev_adjust_nb_rx_tx_desc;
rte_flow_copy;
-   rte_flow_isolate;
rte_tm_capabilities_get;
rte_tm_hierarchy_commit;
rte_tm_level_capabilities_get;
@@ -192,7 +186,6 @@ DPDK_17.11 {
rte_eth_dev_get_sec_ctx;
rte_eth_dev_pool_ops_supported;
rte_eth_dev_reset;
-   rte_flow_error_set;
 
 } DPDK_17.08;
 
@@ -203,6 +196,19 @@ DPDK_18.02 {
 
 } DPDK_17.11;
 
+DPDK_18.05 {
+   global:
+
+   rte_flow_create;
+   rte_flow_destroy;
+   rte_flow_error_set;
+   rte_flow_flush;
+   rte_flow_isolate;
+   rte_flow_query;
+   rte_flow_validate;
+
+} DPDK_18.02;
+
 EXPERIMENTAL {
global:
 
diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h
index 44ae19d3b..26b95c772 100644
--- a/lib/librte_ether/rte_flow.h
+++ b/lib/librte_ether/rte_flow.h
@@ -1186,8 +1186,12 @@ enum rte_flow_error_type {
RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, /**< Egress field. */
RTE_FLOW_ERROR_TYPE_ATTR, /**< Attributes structure. */
RTE_FLOW_ERROR_TYPE_ITEM_NUM, /**< Pattern length. */
+   RTE_FLOW_ERROR_TYPE_ITEM_SPEC, /**< Item specification. */
+   RTE_FLOW_ERROR_TYPE_ITEM_LAST, /**< Item specification range. */
+   RTE_FLOW_ERROR_TYPE_ITEM_MASK, /**< Item specification mask. */
RTE_FLOW_ERROR_TYPE_ITEM, /**< Specific pattern item. */
RTE_FLOW_ERROR_TYPE_ACTION_NUM, /**< Number of actions. */
+   RTE_FLOW_ERROR_TYPE_ACTION_CONF, /**< Action configuration. */
RTE_FLOW_ERROR_TYPE_ACTION, /**< Specific action. */
 };
 
-- 
2.11.0


[dpdk-dev] [PATCH v5 02/16] ethdev: clarify flow API pattern items and actions

2018-04-19 Thread Adrien Mazarguil
Although pattern items and actions examples end with "and so on", these
lists include all existing definitions and as a result are updated almost
every time new types are added. This is cumbersome and pointless.

This patch also synchronizes Doxygen and external API documentation wording
with a slight clarification regarding meta pattern items.

No fundamental API change.

Signed-off-by: Adrien Mazarguil 
Acked-by: Andrew Rybchenko 
---
 doc/guides/prog_guide/rte_flow.rst | 23 +++
 lib/librte_ether/rte_flow.h| 23 ++-
 2 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index 961943dda..a11ebd617 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -186,12 +186,13 @@ Pattern item
 
 Pattern items fall in two categories:
 
-- Matching protocol headers and packet data (ANY, RAW, ETH, VLAN, IPV4,
-  IPV6, ICMP, UDP, TCP, SCTP, VXLAN, MPLS, GRE, ESP and so on), usually
-  associated with a specification structure.
+- Matching protocol headers and packet data, usually associated with a
+  specification structure. These must be stacked in the same order as the
+  protocol layers to match inside packets, starting from the lowest.
 
-- Matching meta-data or affecting pattern processing (END, VOID, INVERT, PF,
-  VF, PORT and so on), often without a specification structure.
+- Matching meta-data or affecting pattern processing, often without a
+  specification structure. Since they do not match packet contents, their
+  position in the list is usually not relevant.
 
 Item specification structures are used to match specific values among
 protocol fields (or item properties). Documentation describes for each item
@@ -1001,15 +1002,13 @@ to a flow rule. That list is not ordered.
 
 They fall in three categories:
 
-- Terminating actions (such as QUEUE, DROP, RSS, PF, VF) that prevent
-  processing matched packets by subsequent flow rules, unless overridden
-  with PASSTHRU.
+- Terminating actions that prevent processing matched packets by subsequent
+  flow rules, unless overridden with PASSTHRU.
 
-- Non-terminating actions (PASSTHRU, DUP) that leave matched packets up for
-  additional processing by subsequent flow rules.
+- Non-terminating actions that leave matched packets up for additional
+  processing by subsequent flow rules.
 
-- Other non-terminating meta actions that do not affect the fate of packets
-  (END, VOID, MARK, FLAG, COUNT, SECURITY).
+- Other non-terminating meta actions that do not affect the fate of packets.
 
 When several actions are combined in a flow rule, they should all have
 different types (e.g. dropping a packet twice is not possible).
diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h
index 26b95c772..d28a2a473 100644
--- a/lib/librte_ether/rte_flow.h
+++ b/lib/librte_ether/rte_flow.h
@@ -78,15 +78,13 @@ struct rte_flow_attr {
  *
  * Pattern items fall in two categories:
  *
- * - Matching protocol headers and packet data (ANY, RAW, ETH, VLAN, IPV4,
- *   IPV6, ICMP, UDP, TCP, SCTP, VXLAN and so on), usually associated with a
+ * - Matching protocol headers and packet data, usually associated with a
  *   specification structure. These must be stacked in the same order as the
- *   protocol layers to match, starting from the lowest.
+ *   protocol layers to match inside packets, starting from the lowest.
  *
- * - Matching meta-data or affecting pattern processing (END, VOID, INVERT,
- *   PF, VF, PORT and so on), often without a specification structure. Since
- *   they do not match packet contents, these can be specified anywhere
- *   within item lists without affecting others.
+ * - Matching meta-data or affecting pattern processing, often without a
+ *   specification structure. Since they do not match packet contents, their
+ *   position in the list is usually not relevant.
  *
  * See the description of individual types for more information. Those
  * marked with [META] fall into the second category.
@@ -865,15 +863,14 @@ struct rte_flow_item {
  *
  * They fall in three categories:
  *
- * - Terminating actions (such as QUEUE, DROP, RSS, PF, VF) that prevent
- *   processing matched packets by subsequent flow rules, unless overridden
- *   with PASSTHRU.
+ * - Terminating actions that prevent processing matched packets by
+ *   subsequent flow rules, unless overridden with PASSTHRU.
  *
- * - Non terminating actions (PASSTHRU, DUP) that leave matched packets up
- *   for additional processing by subsequent flow rules.
+ * - Non terminating actions that leave matched packets up for additional
+ *   processing by subsequent flow rules.
  *
  * - Other non terminating meta actions that do not affect the fate of
- *   packets (END, VOID, MARK, FLAG, COUNT).
+ *   packets.
  *
  * When several actions are combined in a flow rule, they should all have
  * different types (e.g. dropping a pac

[dpdk-dev] [PATCH v5 00/16] Flow API overhaul for switch offloads

2018-04-19 Thread Adrien Mazarguil
As summarized in a prior RFC [1], the flow API (rte_flow) was chosen as a
means to manage switch offloads supported by many devices (usually going by
names such as E-Switch or vSwitch) through user-specified flow rules.

Combined with the need to support encap/decap actions, this requires a
change in the way flow actions are processed (in order and possibly
repeated) which modifies the behavior of some of the existing actions, thus
warranting a major ABI breakage.

Given this ABI breakage is also required by other work submitted for the
current release [2][3], this series addresses various longstanding issues
with the flow API and makes minor improvements in preparation for upcoming
features.

Changes summary:

- Additional error types.
- Clearer documentation.
- Improved C++ compatibility.
- Exhaustive RSS action.
- Consistent behavior of VLAN pattern item.
- New "transfer" attribute bringing consistency to VF/PF pattern items.
- Confusing "PORT" pattern item renamed "PHY_PORT", with new action
  counterpart.
- New "PORT_ID" pattern item and action to be used with port representors.

This series piggybacks on the major ABI update introduced by a prior
commit [4] for DPDK 18.05 and depends on several fixes [5] which must be
applied first.

[1] "[RFC] Switch device offload with DPDK"
http://dpdk.org/ml/archives/dev/2018-March/092513.html

[2] commit 676b605182a5 ("doc: announce ethdev API change for RSS
configuration")

[3] "[PATCH v1 00/21] MLX5 tunnel Rx offloading"
http://dpdk.org/ml/archives/dev/2018-March/092264.html

[4] commit 653e038efc9b ("ethdev: remove versioning of filter control
function")

[5] "[PATCH v6 00/11] Bunch of flow API-related fixes"
http://dpdk.org/ml/archives/dev/2018-April/098035.html

v5 changes:

- Fixed errors reported by GCC and Clang in patch 05/16 ("ethdev: alter
  behavior of flow API actions").
- Rebased series once again.

v4 changes:

- No change besides new acked-by lines, rebased series to address conflicts.

v3 changes:

- Rebased series, fixed latest conflicts.
- Addressed Andrew's comments, see affected patches for details:
  - Empty RSS types in flow rule means PMD-specific RSS instead of no RSS.
  - RSS hash function now explicitly compared against
RTE_ETH_HASH_FUNCTION_DEFAULT instead of 0 in all PMDs.
  - sfc PMD updated to also accept Toeplitz.
  - Implicit VLAN TPID matching now removed from all PMDs.
  - Default mask upate for VLAN TCI now split as separate patch #11.
  - Ingress/egress definition clarified in patch #12.

v2 changes:

- Squashed "ethdev: update ABI for flow API functions" in subsequent
  patches.
- Emphasized ABI impact in relevant commit logs.
- Modified documentation in "ethdev: alter behavior of flow API actions" to
  describe how terminating flow rules without any action of the fate kind
  result in undefined behavior instead of dropping traffic.
- Fixed other minor documentation formatting issues.
- Modified "ethdev: refine TPID handling in flow API" as follows:
  - Using standard macro definitions for VLAN, QinQ and E-Tag EtherTypes.
  - Fixed endian conversion in sfc.
  - Replaced a condition in VLAN pattern item processing with an assertion
check for i40e.

Adrien Mazarguil (16):
  ethdev: add error types to flow API
  ethdev: clarify flow API pattern items and actions
  doc: remove flow API migration section
  ethdev: remove DUP action from flow API
  ethdev: alter behavior of flow API actions
  ethdev: remove C99 flexible arrays from flow API
  ethdev: flatten RSS configuration in flow API
  ethdev: add hash function to RSS flow API action
  ethdev: add encap level to RSS flow API action
  ethdev: refine TPID handling in flow API
  ethdev: limit default VLAN TCI mask in flow API
  ethdev: add transfer attribute to flow API
  ethdev: update behavior of VF/PF in flow API
  ethdev: rename physical port item in flow API
  ethdev: add physical port action to flow API
  ethdev: add port ID item and action to flow API

 app/test-pmd/cmdline_flow.c | 394 +++
 app/test-pmd/config.c   |  78 +--
 doc/guides/nics/tap.rst |   2 +-
 doc/guides/prog_guide/rte_flow.rst  | 618 ---
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |  60 ++-
 drivers/net/bnxt/bnxt_filter.c  |  49 +-
 drivers/net/e1000/e1000_ethdev.h|  13 +-
 drivers/net/e1000/igb_ethdev.c  |   4 +-
 drivers/net/e1000/igb_flow.c|  83 ++-
 drivers/net/e1000/igb_rxtx.c|  55 +-
 drivers/net/enic/enic_flow.c|  50 +-
 drivers/net/i40e/i40e_ethdev.c  |  57 ++-
 drivers/net/i40e/i40e_ethdev.h  |  15 +-
 drivers/net/i40e/i40e_flow.c| 130 +++--
 drivers/net/ixgbe/ixgbe_ethdev.c|   7 +-
 drivers/net/ixgbe/ixgbe_ethdev.h|  13 +-
 drivers/net/ixgbe/ixgbe_flow.c  |  91 +++-
 drivers/net/ixgbe/ixgbe_rxtx.c  |  55 

[dpdk-dev] [PATCH v5 03/16] doc: remove flow API migration section

2018-04-19 Thread Adrien Mazarguil
This section has become less relevant since the flow API (rte_flow) is now
a mature DPDK API with applications developed directly on top of it instead
of an afterthought.

This patch removes it for the following reasons:

- It has never been updated to track the latest changes in the legacy
  filter types and never will.

- Many provided examples are theoretical and misleading since PMDs do not
  implement them. Others are obvious.

- Upcoming work on the flow API will alter the behavior of several pattern
  items, actions and in some cases, flow rules, which will in turn cause
  existing examples to be wrong.

Signed-off-by: Adrien Mazarguil 
Acked-by: Andrew Rybchenko 
---
 doc/guides/prog_guide/rte_flow.rst | 298 
 1 file changed, 298 deletions(-)

diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index a11ebd617..51826d04c 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -55,9 +55,6 @@ encompasses and supersedes (including all functions and 
filter types) in
 order to expose a single interface with an unambiguous behavior that is
 common to all poll-mode drivers (PMDs).
 
-Several methods to migrate existing applications are described in `API
-migration`_.
-
 Flow rule
 -
 
@@ -2068,298 +2065,3 @@ Future evolutions
 
 - Optional software fallback when PMDs are unable to handle requested flow
   rules so applications do not have to implement their own.
-
-API migration
--
-
-Exhaustive list of deprecated filter types (normally prefixed with
-*RTE_ETH_FILTER_*) found in ``rte_eth_ctrl.h`` and methods to convert them
-to *rte_flow* rules.
-
-``MACVLAN`` to ``ETH`` → ``VF``, ``PF``
-~~~
-
-*MACVLAN* can be translated to a basic `Item: ETH`_ flow rule with a
-terminating `Action: VF`_ or `Action: PF`_.
-
-.. _table_rte_flow_migration_macvlan:
-
-.. table:: MACVLAN conversion
-
-   +--+-+
-   | Pattern  | Actions |
-   +===+=+==+=+=+
-   | 0 | ETH | ``spec`` | any | VF, |
-   |   | +--+-+ PF  |
-   |   | | ``last`` | N/A | |
-   |   | +--+-+ |
-   |   | | ``mask`` | any | |
-   +---+-+--+-+-+
-   | 1 | END  | END |
-   +---+--+-+
-
-``ETHERTYPE`` to ``ETH`` → ``QUEUE``, ``DROP``
-~~
-
-*ETHERTYPE* is basically an `Item: ETH`_ flow rule with a terminating
-`Action: QUEUE`_ or `Action: DROP`_.
-
-.. _table_rte_flow_migration_ethertype:
-
-.. table:: ETHERTYPE conversion
-
-   +--+-+
-   | Pattern  | Actions |
-   +===+=+==+=+=+
-   | 0 | ETH | ``spec`` | any | QUEUE,  |
-   |   | +--+-+ DROP|
-   |   | | ``last`` | N/A | |
-   |   | +--+-+ |
-   |   | | ``mask`` | any | |
-   +---+-+--+-+-+
-   | 1 | END  | END |
-   +---+--+-+
-
-``FLEXIBLE`` to ``RAW`` → ``QUEUE``
-~~~
-
-*FLEXIBLE* can be translated to one `Item: RAW`_ pattern with a terminating
-`Action: QUEUE`_ and a defined priority level.
-
-.. _table_rte_flow_migration_flexible:
-
-.. table:: FLEXIBLE conversion
-
-   +--+-+
-   | Pattern  | Actions |
-   +===+=+==+=+=+
-   | 0 | RAW | ``spec`` | any | QUEUE   |
-   |   | +--+-+ |
-   |   | | ``last`` | N/A | |
-   |   | +--+-+ |
-   |   | | ``mask`` | any | |
-   +---+-+--+-+-+
-   | 1 | END  | END |
-   +---+--+-+
-
-``SYN`` to ``TCP`` → ``QUEUE``
-~~
-
-*SYN* is a `Item: TCP`_ rule with only the ``syn`` bit enabled and masked,
-and a terminating `Action: QUEUE`_.
-
-Priority level can be set to simulate the high priority bit.
-
-.. _table_rte_flow_migration_syn:
-
-.. table:: SYN conversion
-
-   +---+-+
-   | Pattern   | Actions |
-   +===+==+==+=+=+
-   | 0 | ETH  | ``spec`` | unset   | QUEUE   |
-   |   |  +--+-+ |
-   |   |  | ``last`` | unset   | |
-   |   |  +--+-+ |
-   |   |  | ``mask`` | unset   | |
-   +---+--+--+-+-+
-   | 1 | IPV4 | ``spec`` | unset   | END |
-   |   |  +--+-+ |
-   |   |  | ``mask`` | unset   | |
-   |   |  +--+-+ |
-   |   |  | ``mask`` | unset   |  

[dpdk-dev] [PATCH v5 05/16] ethdev: alter behavior of flow API actions

2018-04-19 Thread Adrien Mazarguil
This patch makes the following changes to flow rule actions:

- List order now matters, they are redefined as performed first to last
  instead of "all simultaneously".

- Repeated actions are now supported (e.g. specifying QUEUE multiple times
  now duplicates traffic among them). Previously only the last action of
  any given kind was taken into account.

- No more distinction between terminating/non-terminating/meta actions.
  Flow rules themselves are now defined as always terminating unless a
  PASSTHRU action is specified.

These changes alter the behavior of flow rules in corner cases in order to
prepare the flow API for actions that modify traffic contents or properties
(e.g. encapsulation, compression) and for which order matter when combined.

Previously one would have to do so through multiple flow rules by combining
PASSTRHU with priority levels, however this proved overly complex to
implement at the PMD level, hence this simpler approach.

This breaks ABI compatibility for the following public functions:

- rte_flow_create()
- rte_flow_validate()

PMDs with rte_flow support are modified accordingly:

- bnxt: no change, implementation already forbids multiple actions and does
  not support PASSTHRU.

- e1000: no change, same as bnxt.

- enic: modified to forbid redundant actions, no support for default drop.

- failsafe: no change needed.

- i40e: no change, implementation already forbids multiple actions.

- ixgbe: same as i40e.

- mlx4: modified to forbid multiple fate-deciding actions and drop when
  unspecified.

- mlx5: same as mlx4, with other redundant actions also forbidden.

- sfc: same as mlx4.

- tap: implementation already complies with the new behavior except for
  the default pass-through modified as a default drop.

Signed-off-by: Adrien Mazarguil 
Reviewed-by: Andrew Rybchenko 
Cc: Ajit Khaparde 
Cc: Wenzhuo Lu 
Cc: John Daley 
Cc: Gaetan Rivet 
Cc: Beilei Xing 
Cc: Konstantin Ananyev 
Cc: Nelio Laranjeiro 
Cc: Andrew Rybchenko 
Cc: Pascal Mazon 

--

v5 changes:

Fixed issues raised by GCC and Clang with overlap checks in both enic and
mlx5, as reported by Andrew [1].

[1] http://dpdk.org/ml/archives/dev/2018-April/097864.html
---
 doc/guides/prog_guide/rte_flow.rst | 67 +---
 drivers/net/enic/enic_flow.c   | 25 
 drivers/net/mlx4/mlx4_flow.c   | 21 +++---
 drivers/net/mlx5/mlx5_flow.c   | 69 ++---
 drivers/net/sfc/sfc_flow.c | 22 +++
 drivers/net/tap/tap_flow.c | 11 ++
 lib/librte_ether/rte_flow.h| 54 +++---
 7 files changed, 138 insertions(+), 131 deletions(-)

diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index a237e4fd2..80360d068 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -995,28 +995,27 @@ Actions
 
 Each possible action is represented by a type. Some have associated
 configuration structures. Several actions combined in a list can be assigned
-to a flow rule. That list is not ordered.
+to a flow rule and are performed in order.
 
 They fall in three categories:
 
-- Terminating actions that prevent processing matched packets by subsequent
-  flow rules, unless overridden with PASSTHRU.
+- Actions that modify the fate of matching traffic, for instance by dropping
+  or assigning it a specific destination.
 
-- Non-terminating actions that leave matched packets up for additional
-  processing by subsequent flow rules.
+- Actions that modify matching traffic contents or its properties. This
+  includes adding/removing encapsulation, encryption, compression and marks.
 
-- Other non-terminating meta actions that do not affect the fate of packets.
+- Actions related to the flow rule itself, such as updating counters or
+  making it non-terminating.
 
-When several actions are combined in a flow rule, they should all have
-different types (e.g. dropping a packet twice is not possible).
+Flow rules being terminating by default, not specifying any action of the
+fate kind results in undefined behavior. This applies to both ingress and
+egress.
 
-Only the last action of a given type is taken into account. PMDs still
-perform error checking on the entire list.
+PASSTHRU, when supported, makes a flow rule non-terminating.
 
 Like matching patterns, action lists are terminated by END items.
 
-*Note that PASSTHRU is the only action able to override a terminating rule.*
-
 Example of action that redirects packets to queue index 10:
 
 .. _table_rte_flow_action_example:
@@ -1029,12 +1028,11 @@ Example of action that redirects packets to queue index 
10:
| ``index`` | 10|
+---+---+
 
-Action lists examples, their order is not significant, applications must
-consider all actions to be performed simultaneously:
+Actions are performed in list order:
 
-.. _table_rte_flow_count_and_drop:
+.. _table_rte_flow_count_then_drop:
 
-.. table:: Co

[dpdk-dev] [PATCH v5 04/16] ethdev: remove DUP action from flow API

2018-04-19 Thread Adrien Mazarguil
Upcoming changes in relation to the handling of actions list will make the
DUP action redundant as specifying several QUEUE actions will achieve the
same behavior. Besides, no PMD implements this action.

By removing an entry from enum rte_flow_action_type, this patch breaks ABI
compatibility for the following public functions:

- rte_flow_copy()
- rte_flow_create()
- rte_flow_query()
- rte_flow_validate()

Signed-off-by: Adrien Mazarguil 
Acked-by: Andrew Rybchenko 
---
 app/test-pmd/cmdline_flow.c | 23 ---
 app/test-pmd/config.c   |  1 -
 doc/guides/prog_guide/rte_flow.rst  | 23 ---
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |  8 
 lib/librte_ether/rte_ethdev_version.map |  2 +-
 lib/librte_ether/rte_flow.c |  1 -
 lib/librte_ether/rte_flow.h | 24 
 7 files changed, 1 insertion(+), 81 deletions(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index f0b4b7bc4..2ddb08feb 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -164,8 +164,6 @@ enum index {
ACTION_QUEUE_INDEX,
ACTION_DROP,
ACTION_COUNT,
-   ACTION_DUP,
-   ACTION_DUP_INDEX,
ACTION_RSS,
ACTION_RSS_TYPES,
ACTION_RSS_TYPE,
@@ -625,7 +623,6 @@ static const enum index next_action[] = {
ACTION_QUEUE,
ACTION_DROP,
ACTION_COUNT,
-   ACTION_DUP,
ACTION_RSS,
ACTION_PF,
ACTION_VF,
@@ -645,12 +642,6 @@ static const enum index action_queue[] = {
ZERO,
 };
 
-static const enum index action_dup[] = {
-   ACTION_DUP_INDEX,
-   ACTION_NEXT,
-   ZERO,
-};
-
 static const enum index action_rss[] = {
ACTION_RSS_TYPES,
ACTION_RSS_KEY,
@@ -1597,20 +1588,6 @@ static const struct token token_list[] = {
.next = NEXT(NEXT_ENTRY(ACTION_NEXT)),
.call = parse_vc,
},
-   [ACTION_DUP] = {
-   .name = "dup",
-   .help = "duplicate packets to a given queue index",
-   .priv = PRIV_ACTION(DUP, sizeof(struct rte_flow_action_dup)),
-   .next = NEXT(action_dup),
-   .call = parse_vc,
-   },
-   [ACTION_DUP_INDEX] = {
-   .name = "index",
-   .help = "queue index to duplicate packets to",
-   .next = NEXT(action_dup, NEXT_ENTRY(UNSIGNED)),
-   .args = ARGS(ARGS_ENTRY(struct rte_flow_action_dup, index)),
-   .call = parse_vc_conf,
-   },
[ACTION_RSS] = {
.name = "rss",
.help = "spread packets among several queues",
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index a7645adb8..d0d372797 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -1065,7 +1065,6 @@ static const struct {
MK_FLOW_ACTION(QUEUE, sizeof(struct rte_flow_action_queue)),
MK_FLOW_ACTION(DROP, 0),
MK_FLOW_ACTION(COUNT, 0),
-   MK_FLOW_ACTION(DUP, sizeof(struct rte_flow_action_dup)),
MK_FLOW_ACTION(RSS, sizeof(struct rte_flow_action_rss)), /* +queue[] */
MK_FLOW_ACTION(PF, 0),
MK_FLOW_ACTION(VF, sizeof(struct rte_flow_action_vf)),
diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index 51826d04c..a237e4fd2 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -1299,26 +1299,6 @@ Query structure to retrieve and reset flow rule counters:
| ``bytes`` | out | number of bytes through this rule |
+---+-+---+
 
-Action: ``DUP``
-^^^
-
-Duplicates packets to a given queue index.
-
-This is normally combined with QUEUE, however when used alone, it is
-actually similar to QUEUE + PASSTHRU.
-
-- Non-terminating by default.
-
-.. _table_rte_flow_action_dup:
-
-.. table:: DUP
-
-   +---++
-   | Field | Value  |
-   +===++
-   | ``index`` | queue index to duplicate packet to |
-   +---++
-
 Action: ``RSS``
 ^^^
 
@@ -2010,9 +1990,6 @@ Unsupported actions
   and tagging (`Action: MARK`_ or `Action: FLAG`_) may be implemented in
   software as long as the target queue is used by a single rule.
 
-- A rule specifying both `Action: DUP`_ + `Action: QUEUE`_ may be translated
-  to two hidden rules combining `Action: QUEUE`_ and `Action: PASSTHRU`_.
-
 - When a single target queue is provided, `Action: RSS`_ can also be
   implemented through `Action: QUEUE`_.
 
diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst 
b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
index cb6f201e1..a015d02a4 100644
--- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
+++ b/doc/guides/testpmd_app_ug/testpmd_funcs

[dpdk-dev] [PATCH v5 06/16] ethdev: remove C99 flexible arrays from flow API

2018-04-19 Thread Adrien Mazarguil
This patch replaces C99-style flexible arrays in struct rte_flow_action_rss
and struct rte_flow_item_raw with standard pointers to the same data.

They proved difficult to use in the field (e.g. no possibility of static
initialization) and unsuitable for C++ applications.

Affected PMDs and examples are updated accordingly.

This breaks ABI compatibility for the following public functions:

- rte_flow_copy()
- rte_flow_create()
- rte_flow_query()
- rte_flow_validate()

Signed-off-by: Adrien Mazarguil 
Acked-by: Thomas Monjalon 
Acked-by: Nelio Laranjeiro 
---
 app/test-pmd/cmdline_flow.c| 117 +---
 app/test-pmd/config.c  |  25 ---
 doc/guides/prog_guide/rte_flow.rst |  18 ++---
 drivers/net/mlx4/mlx4_flow.c   |  22 +++---
 drivers/net/mlx5/mlx5_flow.c   |  20 +++---
 examples/ipsec-secgw/ipsec.c   |  17 ++---
 lib/librte_ether/rte_flow.c|  25 ---
 lib/librte_ether/rte_flow.h|   8 ++-
 8 files changed, 135 insertions(+), 117 deletions(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 2ddb08feb..798b7948d 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -179,25 +179,22 @@ enum index {
ACTION_METER_ID,
 };
 
-/** Size of pattern[] field in struct rte_flow_item_raw. */
-#define ITEM_RAW_PATTERN_SIZE 36
+/** Maximum size for pattern in struct rte_flow_item_raw. */
+#define ITEM_RAW_PATTERN_SIZE 40
 
 /** Storage size for struct rte_flow_item_raw including pattern. */
 #define ITEM_RAW_SIZE \
-   (offsetof(struct rte_flow_item_raw, pattern) + ITEM_RAW_PATTERN_SIZE)
+   (sizeof(struct rte_flow_item_raw) + ITEM_RAW_PATTERN_SIZE)
 
 /** Maximum number of queue indices in struct rte_flow_action_rss. */
 #define ACTION_RSS_QUEUE_NUM 32
 
 /** Storage for struct rte_flow_action_rss including external data. */
-union action_rss_data {
+struct action_rss_data {
struct rte_flow_action_rss conf;
-   struct {
-   uint8_t conf_data[offsetof(struct rte_flow_action_rss, queue)];
-   uint16_t queue[ACTION_RSS_QUEUE_NUM];
-   struct rte_eth_rss_conf rss_conf;
-   uint8_t rss_key[RSS_HASH_KEY_LENGTH];
-   } s;
+   uint16_t queue[ACTION_RSS_QUEUE_NUM];
+   struct rte_eth_rss_conf rss_conf;
+   uint8_t rss_key[RSS_HASH_KEY_LENGTH];
 };
 
 /** Maximum number of subsequent tokens and arguments on the stack. */
@@ -320,13 +317,6 @@ struct token {
.size = sizeof(*((s *)0)->f), \
})
 
-/** Static initializer for ARGS() with arbitrary size. */
-#define ARGS_ENTRY_USZ(s, f, sz) \
-   (&(const struct arg){ \
-   .offset = offsetof(s, f), \
-   .size = (sz), \
-   })
-
 /** Static initializer for ARGS() with arbitrary offset and size. */
 #define ARGS_ENTRY_ARB(o, s) \
(&(const struct arg){ \
@@ -1105,9 +1095,9 @@ static const struct token token_list[] = {
 NEXT_ENTRY(ITEM_PARAM_IS,
ITEM_PARAM_SPEC,
ITEM_PARAM_MASK)),
-   .args = ARGS(ARGS_ENTRY(struct rte_flow_item_raw, length),
-ARGS_ENTRY_USZ(struct rte_flow_item_raw,
-   pattern,
+   .args = ARGS(ARGS_ENTRY(struct rte_flow_item_raw, pattern),
+ARGS_ENTRY(struct rte_flow_item_raw, length),
+ARGS_ENTRY_ARB(sizeof(struct rte_flow_item_raw),
ITEM_RAW_PATTERN_SIZE)),
},
[ITEM_ETH] = {
@@ -1591,7 +1581,7 @@ static const struct token token_list[] = {
[ACTION_RSS] = {
.name = "rss",
.help = "spread packets among several queues",
-   .priv = PRIV_ACTION(RSS, sizeof(union action_rss_data)),
+   .priv = PRIV_ACTION(RSS, sizeof(struct action_rss_data)),
.next = NEXT(action_rss),
.call = parse_vc_action_rss,
},
@@ -1610,23 +1600,21 @@ static const struct token token_list[] = {
.name = "key",
.help = "RSS hash key",
.next = NEXT(action_rss, NEXT_ENTRY(STRING)),
-   .args = ARGS(ARGS_ENTRY_ARB
-(((uintptr_t)&((union action_rss_data *)0)->
-  s.rss_conf.rss_key_len),
+   .args = ARGS(ARGS_ENTRY_ARB(0, 0),
+ARGS_ENTRY_ARB
+(offsetof(struct action_rss_data, rss_conf) +
+ offsetof(struct rte_eth_rss_conf, rss_key_len),
  sizeof(((struct rte_eth_rss_conf *)0)->
 rss_key_len)),
-ARGS_ENTRY_ARB
-(((uintptr_t)((union action_rss_data *)0)->
-  s.rss_

[dpdk-dev] [PATCH v5 07/16] ethdev: flatten RSS configuration in flow API

2018-04-19 Thread Adrien Mazarguil
Since its inception, the rte_flow RSS action has been relying in part on
external struct rte_eth_rss_conf for compatibility with the legacy RSS API.
This structure lacks parameters such as the hash algorithm to use, and more
recently, a method to tell which layer RSS should be performed on [1].

Given struct rte_eth_rss_conf will never be flexible enough to represent a
complete RSS configuration (e.g. RETA table), this patch supersedes it by
extending the rte_flow RSS action directly.

A subsequent patch will add a field to use a non-default RSS hash
algorithm. To that end, a field named "types" replaces the field formerly
known as "rss_hf" and standing for "RSS hash functions" as it was
confusing. Actual RSS hash function types are defined by enum
rte_eth_hash_function.

This patch updates all PMDs and example applications accordingly.

It breaks ABI compatibility for the following public functions:

- rte_flow_copy()
- rte_flow_create()
- rte_flow_query()
- rte_flow_validate()

[1] commit 676b605182a5 ("doc: announce ethdev API change for RSS
configuration")

Signed-off-by: Adrien Mazarguil 
Acked-by: Andrew Rybchenko 
Cc: Xueming Li 
Cc: Ferruh Yigit 
Cc: Thomas Monjalon 
Cc: Wenzhuo Lu 
Cc: Jingjing Wu 
Cc: Beilei Xing 
Cc: Qi Zhang 
Cc: Konstantin Ananyev 
Cc: Nelio Laranjeiro 
Cc: Yongseok Koh 
Cc: Andrew Rybchenko 
Cc: Pascal Mazon 
Cc: Radu Nicolau 
Cc: Akhil Goyal 

---

v3 changes:

Documentation update regarding the meaning of a 0 value for RSS types in
flow rules.

It used to implicitly mean "no RSS" but is redefined as requesting a kind
of "best-effort" mode from PMDs, i.e. anything ranging from empty to
all-inclusive RSS; what matters is it provides safe defaults that will work
regardless of PMD capabilities.
---
 app/test-pmd/cmdline_flow.c |  48 +++---
 app/test-pmd/config.c   |  39 ++---
 doc/guides/prog_guide/rte_flow.rst  |  28 ++--
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |   6 +-
 drivers/net/e1000/e1000_ethdev.h|  13 +-
 drivers/net/e1000/igb_ethdev.c  |   4 +-
 drivers/net/e1000/igb_flow.c|  31 ++--
 drivers/net/e1000/igb_rxtx.c|  51 +-
 drivers/net/i40e/i40e_ethdev.c  |  53 +--
 drivers/net/i40e/i40e_ethdev.h  |  15 +-
 drivers/net/i40e/i40e_flow.c|  47 +++---
 drivers/net/ixgbe/ixgbe_ethdev.c|   4 +-
 drivers/net/ixgbe/ixgbe_ethdev.h|  13 +-
 drivers/net/ixgbe/ixgbe_flow.c  |  30 ++--
 drivers/net/ixgbe/ixgbe_rxtx.c  |  51 +-
 drivers/net/mlx4/mlx4.c |   2 +-
 drivers/net/mlx4/mlx4_flow.c|  61 +++
 drivers/net/mlx4/mlx4_flow.h|   2 +-
 drivers/net/mlx4/mlx4_rxq.c |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h|   2 +-
 drivers/net/mlx5/mlx5_flow.c| 193 +++
 drivers/net/mlx5/mlx5_rxq.c |  22 +--
 drivers/net/mlx5/mlx5_rxtx.h|  26 +--
 drivers/net/sfc/sfc_flow.c  |  21 ++-
 drivers/net/tap/tap_flow.c  |   8 +-
 examples/ipsec-secgw/ipsec.c|  10 +-
 lib/librte_ether/rte_flow.c |  39 ++---
 lib/librte_ether/rte_flow.h |  12 +-
 28 files changed, 478 insertions(+), 355 deletions(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 798b7948d..c9c2c3ad9 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -192,9 +192,8 @@ enum index {
 /** Storage for struct rte_flow_action_rss including external data. */
 struct action_rss_data {
struct rte_flow_action_rss conf;
+   uint8_t key[RSS_HASH_KEY_LENGTH];
uint16_t queue[ACTION_RSS_QUEUE_NUM];
-   struct rte_eth_rss_conf rss_conf;
-   uint8_t rss_key[RSS_HASH_KEY_LENGTH];
 };
 
 /** Maximum number of subsequent tokens and arguments on the stack. */
@@ -1587,7 +1586,7 @@ static const struct token token_list[] = {
},
[ACTION_RSS_TYPES] = {
.name = "types",
-   .help = "RSS hash types",
+   .help = "specific RSS hash types",
.next = NEXT(action_rss, NEXT_ENTRY(ACTION_RSS_TYPE)),
},
[ACTION_RSS_TYPE] = {
@@ -1602,21 +1601,21 @@ static const struct token token_list[] = {
.next = NEXT(action_rss, NEXT_ENTRY(STRING)),
.args = ARGS(ARGS_ENTRY_ARB(0, 0),
 ARGS_ENTRY_ARB
-(offsetof(struct action_rss_data, rss_conf) +
- offsetof(struct rte_eth_rss_conf, rss_key_len),
- sizeof(((struct rte_eth_rss_conf *)0)->
-rss_key_len)),
-ARGS_ENTRY(struct action_rss_data, rss_key)),
+(offsetof(struct action_rss_data, conf) +
+  

[dpdk-dev] [PATCH v5 08/16] ethdev: add hash function to RSS flow API action

2018-04-19 Thread Adrien Mazarguil
By definition, RSS involves some kind of hash algorithm, usually Toeplitz.

Until now it could not be modified on a flow rule basis and PMDs had to
always assume RTE_ETH_HASH_FUNCTION_DEFAULT, which remains the default
behavior when unspecified (0).

This breaks ABI compatibility for the following public functions:

- rte_flow_copy()
- rte_flow_create()
- rte_flow_query()
- rte_flow_validate()

Signed-off-by: Adrien Mazarguil 
Acked-by: Andrew Rybchenko 
Cc: Ferruh Yigit 
Cc: Thomas Monjalon 
Cc: Wenzhuo Lu 
Cc: Jingjing Wu 
Cc: Beilei Xing 
Cc: Qi Zhang 
Cc: Konstantin Ananyev 
Cc: Nelio Laranjeiro 
Cc: Yongseok Koh 
Cc: Andrew Rybchenko 
Cc: Pascal Mazon 

---

v3 changes:

- Although RTE_ETH_HASH_FUNCTION_DEFAULT is defined as 0, made comparisons
  more explicit where doing so would clarify the code.

- Updated sfc to include Toeplitz as the other allowed value.

Both according to Andrew's suggestions [1].

[1] http://dpdk.org/ml/archives/dev/2018-April/095840.html
---
 app/test-pmd/cmdline_flow.c | 72 
 app/test-pmd/config.c   |  1 +
 doc/guides/prog_guide/rte_flow.rst  |  2 +
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |  3 +
 drivers/net/e1000/igb_flow.c|  4 ++
 drivers/net/e1000/igb_rxtx.c|  4 +-
 drivers/net/i40e/i40e_ethdev.c  |  4 +-
 drivers/net/i40e/i40e_flow.c|  4 ++
 drivers/net/ixgbe/ixgbe_flow.c  |  4 ++
 drivers/net/ixgbe/ixgbe_rxtx.c  |  4 +-
 drivers/net/mlx4/mlx4_flow.c|  7 +++
 drivers/net/mlx5/mlx5_flow.c| 13 +
 drivers/net/sfc/sfc_flow.c  |  8 +++
 drivers/net/tap/tap_flow.c  |  6 ++
 lib/librte_ether/rte_flow.c |  1 +
 lib/librte_ether/rte_flow.h |  2 +
 16 files changed, 136 insertions(+), 3 deletions(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index c9c2c3ad9..7436e0356 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -14,6 +14,7 @@
 #include 
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -165,6 +166,10 @@ enum index {
ACTION_DROP,
ACTION_COUNT,
ACTION_RSS,
+   ACTION_RSS_FUNC,
+   ACTION_RSS_FUNC_DEFAULT,
+   ACTION_RSS_FUNC_TOEPLITZ,
+   ACTION_RSS_FUNC_SIMPLE_XOR,
ACTION_RSS_TYPES,
ACTION_RSS_TYPE,
ACTION_RSS_KEY,
@@ -632,6 +637,7 @@ static const enum index action_queue[] = {
 };
 
 static const enum index action_rss[] = {
+   ACTION_RSS_FUNC,
ACTION_RSS_TYPES,
ACTION_RSS_KEY,
ACTION_RSS_KEY_LEN,
@@ -666,6 +672,9 @@ static int parse_vc_conf(struct context *, const struct 
token *,
 static int parse_vc_action_rss(struct context *, const struct token *,
   const char *, unsigned int, void *,
   unsigned int);
+static int parse_vc_action_rss_func(struct context *, const struct token *,
+   const char *, unsigned int, void *,
+   unsigned int);
 static int parse_vc_action_rss_type(struct context *, const struct token *,
const char *, unsigned int, void *,
unsigned int);
@@ -1584,6 +1593,29 @@ static const struct token token_list[] = {
.next = NEXT(action_rss),
.call = parse_vc_action_rss,
},
+   [ACTION_RSS_FUNC] = {
+   .name = "func",
+   .help = "RSS hash function to apply",
+   .next = NEXT(action_rss,
+NEXT_ENTRY(ACTION_RSS_FUNC_DEFAULT,
+   ACTION_RSS_FUNC_TOEPLITZ,
+   ACTION_RSS_FUNC_SIMPLE_XOR)),
+   },
+   [ACTION_RSS_FUNC_DEFAULT] = {
+   .name = "default",
+   .help = "default hash function",
+   .call = parse_vc_action_rss_func,
+   },
+   [ACTION_RSS_FUNC_TOEPLITZ] = {
+   .name = "toeplitz",
+   .help = "Toeplitz hash function",
+   .call = parse_vc_action_rss_func,
+   },
+   [ACTION_RSS_FUNC_SIMPLE_XOR] = {
+   .name = "simple_xor",
+   .help = "simple XOR hash function",
+   .call = parse_vc_action_rss_func,
+   },
[ACTION_RSS_TYPES] = {
.name = "types",
.help = "specific RSS hash types",
@@ -2074,6 +2106,7 @@ parse_vc_action_rss(struct context *ctx, const struct 
token *token,
action_rss_data = ctx->object;
*action_rss_data = (struct action_rss_data){
.conf = (struct rte_flow_action_rss){
+   .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
.types = rss_hf,
.key_len = sizeof(action_rss_data->key),
.queu

[dpdk-dev] [PATCH v5 11/16] ethdev: limit default VLAN TCI mask in flow API

2018-04-19 Thread Adrien Mazarguil
VLAN TCI is a 16-bit field broken down as PCP (3b), DEI (1b) and VID (12b).

The default mask used by PMDs for the VLAN pattern when one isn't provided
by the application comprises the entire TCI, which is problematic because
most devices only support VID matching.

This forces applications to always provide a mask limited to the VID part
in order to successfully apply a flow rule with a VLAN pattern item.
Moreover, applications rarely want to match PCP and DEI intentionally.

Given the above and since VID is what is commonly referred to when talking
about VLAN, this commit excludes PCP and DEI from the default mask.

Signed-off-by: Adrien Mazarguil 
Acked-by: Andrew Rybchenko 
Cc: Ferruh Yigit 
Cc: Thomas Monjalon 
Cc: Wenzhuo Lu 
Cc: Jingjing Wu 
Cc: Ajit Khaparde 
Cc: Somnath Kotur 
Cc: John Daley 
Cc: Hyong Youb Kim 
Cc: Beilei Xing 
Cc: Qi Zhang 
Cc: Konstantin Ananyev 
Cc: Nelio Laranjeiro 
Cc: Yongseok Koh 
Cc: Tomasz Duszynski 
Cc: Dmitri Epshtein 
Cc: Natalie Samsonov 
Cc: Jianbo Liu 
Cc: Andrew Rybchenko 
Cc: Pascal Mazon 

---

v3 changes:

These changes were previously mistakenly made part of the previous patch
("ethdev: refine TPID handling in flow API") from which they were split
following Andrew's rightful comment [1].

[1] http://dpdk.org/ml/archives/dev/2018-April/095870.html
---
 doc/guides/prog_guide/rte_flow.rst | 2 +-
 lib/librte_ether/rte_flow.h| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index fd317b48c..c62a80566 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -806,7 +806,7 @@ preceding pattern item.
 
 - ``tci``: tag control information.
 - ``inner_type``: inner EtherType or TPID.
-- Default ``mask`` matches TCI only.
+- Default ``mask`` matches the VID part of TCI only (lower 12 bits).
 
 Item: ``IPV4``
 ^^
diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h
index 8e50384d0..513734dce 100644
--- a/lib/librte_ether/rte_flow.h
+++ b/lib/librte_ether/rte_flow.h
@@ -493,7 +493,7 @@ struct rte_flow_item_vlan {
 /** Default mask for RTE_FLOW_ITEM_TYPE_VLAN. */
 #ifndef __cplusplus
 static const struct rte_flow_item_vlan rte_flow_item_vlan_mask = {
-   .tci = RTE_BE16(0x),
+   .tci = RTE_BE16(0x0fff),
.inner_type = RTE_BE16(0x),
 };
 #endif
-- 
2.11.0


[dpdk-dev] [PATCH v5 09/16] ethdev: add encap level to RSS flow API action

2018-04-19 Thread Adrien Mazarguil
RSS hash types (ETH_RSS_* macros defined in rte_ethdev.h) describe the
protocol header fields of a packet that must be taken into account while
computing RSS.

When facing encapsulated (e.g. tunneled) packets, there is an ambiguity as
to whether these should apply to inner or outer packets. Applications need
the ability to tell exactly "where" RSS must be performed.

This is addressed by adding encapsulation level information to the RSS flow
action. Its default value is 0 and stands for the usual unspecified
behavior. Other values provide a specific encapsulation level.

Contrary to the change announced by commit 676b605182a5 ("doc: announce
ethdev API change for RSS configuration"), this patch does not affect
struct rte_eth_rss_conf but struct rte_flow_action_rss as the former is not
used anymore by the RSS flow action. ABI impact is therefore limited to
rte_flow.

This breaks ABI compatibility for the following public functions:

- rte_flow_copy()
- rte_flow_create()
- rte_flow_query()
- rte_flow_validate()

Signed-off-by: Adrien Mazarguil 
Acked-by: Andrew Rybchenko 
Cc: Xueming Li 
Cc: Ferruh Yigit 
Cc: Thomas Monjalon 
Cc: Wenzhuo Lu 
Cc: Jingjing Wu 
Cc: Beilei Xing 
Cc: Qi Zhang 
Cc: Konstantin Ananyev 
Cc: Nelio Laranjeiro 
Cc: Yongseok Koh 
Cc: Andrew Rybchenko 
Cc: Pascal Mazon 
---
 app/test-pmd/cmdline_flow.c | 13 
 app/test-pmd/config.c   |  1 +
 doc/guides/prog_guide/rte_flow.rst  | 24 ++
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |  2 ++
 drivers/net/e1000/igb_flow.c|  4 
 drivers/net/e1000/igb_rxtx.c|  2 ++
 drivers/net/i40e/i40e_ethdev.c  |  2 ++
 drivers/net/i40e/i40e_flow.c|  4 
 drivers/net/ixgbe/ixgbe_flow.c  |  4 
 drivers/net/ixgbe/ixgbe_rxtx.c  |  2 ++
 drivers/net/mlx4/mlx4_flow.c|  6 ++
 drivers/net/mlx5/mlx5_flow.c| 11 ++
 drivers/net/sfc/sfc_flow.c  |  3 +++
 drivers/net/tap/tap_flow.c  |  6 +-
 lib/librte_ether/rte_flow.c |  1 +
 lib/librte_ether/rte_flow.h | 26 
 16 files changed, 110 insertions(+), 1 deletion(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 7436e0356..976fde7cd 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -167,6 +167,7 @@ enum index {
ACTION_COUNT,
ACTION_RSS,
ACTION_RSS_FUNC,
+   ACTION_RSS_LEVEL,
ACTION_RSS_FUNC_DEFAULT,
ACTION_RSS_FUNC_TOEPLITZ,
ACTION_RSS_FUNC_SIMPLE_XOR,
@@ -638,6 +639,7 @@ static const enum index action_queue[] = {
 
 static const enum index action_rss[] = {
ACTION_RSS_FUNC,
+   ACTION_RSS_LEVEL,
ACTION_RSS_TYPES,
ACTION_RSS_KEY,
ACTION_RSS_KEY_LEN,
@@ -1616,6 +1618,16 @@ static const struct token token_list[] = {
.help = "simple XOR hash function",
.call = parse_vc_action_rss_func,
},
+   [ACTION_RSS_LEVEL] = {
+   .name = "level",
+   .help = "encapsulation level for \"types\"",
+   .next = NEXT(action_rss, NEXT_ENTRY(UNSIGNED)),
+   .args = ARGS(ARGS_ENTRY_ARB
+(offsetof(struct action_rss_data, conf) +
+ offsetof(struct rte_flow_action_rss, level),
+ sizeof(((struct rte_flow_action_rss *)0)->
+level))),
+   },
[ACTION_RSS_TYPES] = {
.name = "types",
.help = "specific RSS hash types",
@@ -2107,6 +2119,7 @@ parse_vc_action_rss(struct context *ctx, const struct 
token *token,
*action_rss_data = (struct action_rss_data){
.conf = (struct rte_flow_action_rss){
.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+   .level = 0,
.types = rss_hf,
.key_len = sizeof(action_rss_data->key),
.queue_num = RTE_MIN(nb_rxq, ACTION_RSS_QUEUE_NUM),
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 19e27a6ca..562fb2f8d 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -1101,6 +1101,7 @@ flow_action_conf_copy(void *buf, const struct 
rte_flow_action *action)
if (dst.rss)
*dst.rss = (struct rte_flow_action_rss){
.func = src.rss->func,
+   .level = src.rss->level,
.types = src.rss->types,
.key_len = src.rss->key_len,
.queue_num = src.rss->queue_num,
diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index e0c68495c..1a09e8a0f 100644
--- a/doc/guides/prog_guide/rte_flow.r

[dpdk-dev] [PATCH v5 10/16] ethdev: refine TPID handling in flow API

2018-04-19 Thread Adrien Mazarguil
TPID handling in rte_flow VLAN and E_TAG pattern item definitions is not
consistent with the normal stacking order of pattern items, which is
confusing to applications.

Problem is that when followed by one of these layers, the EtherType field
of the preceding layer keeps its "inner" definition, and the "outer" TPID
is provided by the subsequent layer, the reverse of how a packet looks like
on the wire:

 Wire: [ ETH TPID = A | VLAN EtherType = B | B DATA ]
 rte_flow: [ ETH EtherType = B | VLAN TPID = A | B DATA ]

Worse, when QinQ is involved, the stacking order of VLAN layers is
unspecified. It is unclear whether it should be reversed (innermost to
outermost) as well given TPID applies to the previous layer:

 Wire:   [ ETH TPID = A | VLAN TPID = B | VLAN EtherType = C | C DATA ]
 rte_flow 1: [ ETH EtherType = C | VLAN TPID = B | VLAN TPID = A | C DATA ]
 rte_flow 2: [ ETH EtherType = C | VLAN TPID = A | VLAN TPID = B | C DATA ]

While specifying EtherType/TPID is hopefully rarely necessary, the stacking
order in case of QinQ and the lack of documentation remain an issue.

This patch replaces TPID in the VLAN pattern item with an inner
EtherType/TPID as is usually done everywhere else (e.g. struct vlan_hdr),
clarifies documentation and updates all relevant code.

It breaks ABI compatibility for the following public functions:

- rte_flow_copy()
- rte_flow_create()
- rte_flow_query()
- rte_flow_validate()

Summary of changes for PMDs that implement ETH, VLAN or E_TAG pattern
items:

- bnxt: EtherType matching is supported with and without VLAN, but TPID
  matching is not and triggers an error.

- e1000: EtherType matching is only supported with the ETHERTYPE filter,
  which does not support VLAN matching, therefore no impact.

- enic: same as bnxt.

- i40e: same as bnxt with existing FDIR limitations on allowed EtherType
  values. The remaining filter types (VXLAN, NVGRE, QINQ) do not support
  EtherType matching.

- ixgbe: same as e1000, with additional minor change to rely on the new
  E-Tag macro definition.

- mlx4: EtherType/TPID matching is not supported, no impact.

- mlx5: same as bnxt.

- mvpp2: same as bnxt.

- sfc: same as bnxt.

- tap: same as bnxt.

Signed-off-by: Adrien Mazarguil 
Acked-by: Andrew Rybchenko 
Cc: Ferruh Yigit 
Cc: Thomas Monjalon 
Cc: Wenzhuo Lu 
Cc: Jingjing Wu 
Cc: Ajit Khaparde 
Cc: Somnath Kotur 
Cc: John Daley 
Cc: Hyong Youb Kim 
Cc: Beilei Xing 
Cc: Qi Zhang 
Cc: Konstantin Ananyev 
Cc: Nelio Laranjeiro 
Cc: Yongseok Koh 
Cc: Tomasz Duszynski 
Cc: Dmitri Epshtein 
Cc: Natalie Samsonov 
Cc: Jianbo Liu 
Cc: Andrew Rybchenko 
Cc: Pascal Mazon 

---

v3 changes:

Updated mrvl to mvpp2.

Moved unrelated default TCI mask update to separate patch.

Fixed sfc according to Andrew's comments [1], which made so much sense that
I standardized on the same behavior for all other PMDs: matching outer TPID
is never supported when a VLAN pattern item is present.

This is done because many devices accept several TPIDs but do not provide
means to match a given one explicitly, it's all or nothing, and that makes
the resulting flow rule inaccurate.

[1] http://dpdk.org/ml/archives/dev/2018-April/095870.html
---
 app/test-pmd/cmdline_flow.c | 17 +++
 doc/guides/nics/tap.rst |  2 +-
 doc/guides/prog_guide/rte_flow.rst  | 19 ++--
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |  4 +-
 drivers/net/bnxt/bnxt_filter.c  | 35 +++---
 drivers/net/enic/enic_flow.c| 19 +---
 drivers/net/i40e/i40e_flow.c| 60 
 drivers/net/ixgbe/ixgbe_ethdev.c|  3 +-
 drivers/net/mlx5/mlx5_flow.c| 13 -
 drivers/net/mvpp2/mrvl_flow.c   | 26 +++---
 drivers/net/sfc/sfc_flow.c  | 18 +++
 drivers/net/tap/tap_flow.c  | 14 --
 lib/librte_ether/rte_flow.h | 22 ++---
 lib/librte_net/rte_ether.h  |  1 +
 14 files changed, 198 insertions(+), 55 deletions(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 976fde7cd..f8f2a559e 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -99,11 +99,11 @@ enum index {
ITEM_ETH_SRC,
ITEM_ETH_TYPE,
ITEM_VLAN,
-   ITEM_VLAN_TPID,
ITEM_VLAN_TCI,
ITEM_VLAN_PCP,
ITEM_VLAN_DEI,
ITEM_VLAN_VID,
+   ITEM_VLAN_INNER_TYPE,
ITEM_IPV4,
ITEM_IPV4_TOS,
ITEM_IPV4_TTL,
@@ -505,11 +505,11 @@ static const enum index item_eth[] = {
 };
 
 static const enum index item_vlan[] = {
-   ITEM_VLAN_TPID,
ITEM_VLAN_TCI,
ITEM_VLAN_PCP,
ITEM_VLAN_DEI,
ITEM_VLAN_VID,
+   ITEM_VLAN_INNER_TYPE,
ITEM_NEXT,
ZERO,
 };
@@ -1142,12 +1142,6 @@ static const struct token token_list[] = {
.next = NEXT(item_vlan),
.call = parse_vc,
},
-  

[dpdk-dev] [PATCH v5 12/16] ethdev: add transfer attribute to flow API

2018-04-19 Thread Adrien Mazarguil
This new attribute enables applications to create flow rules that do not
simply match traffic whose origin is specified in the pattern (e.g. some
non-default physical port or VF), but actively affect it by applying the
flow rule at the lowest possible level in the underlying device.

It breaks ABI compatibility for the following public functions:

- rte_flow_copy()
- rte_flow_create()
- rte_flow_validate()

Signed-off-by: Adrien Mazarguil 
Cc: Andrew Rybchenko 

---

v3 changes:

Clarified definition for ingress and egress following Andrew's comment on
subsequent patch.

[1] http://dpdk.org/ml/archives/dev/2018-April/095961.html
---
 app/test-pmd/cmdline_flow.c | 11 +
 app/test-pmd/config.c   |  6 ++-
 doc/guides/prog_guide/rte_flow.rst  | 26 +++-
 doc/guides/testpmd_app_ug/testpmd_funcs.rst | 11 ++---
 drivers/net/bnxt/bnxt_filter.c  |  8 
 drivers/net/e1000/igb_flow.c| 44 
 drivers/net/enic/enic_flow.c|  6 +++
 drivers/net/i40e/i40e_flow.c|  8 
 drivers/net/ixgbe/ixgbe_flow.c  | 53 
 drivers/net/mlx4/mlx4_flow.c|  4 ++
 drivers/net/mlx5/mlx5_flow.c|  7 
 drivers/net/mvpp2/mrvl_flow.c   |  6 +++
 drivers/net/sfc/sfc_flow.c  |  6 +++
 drivers/net/tap/tap_flow.c  |  6 +++
 lib/librte_ether/rte_flow.h | 22 +-
 15 files changed, 215 insertions(+), 9 deletions(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index f8f2a559e..1c6b5a112 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -69,6 +69,7 @@ enum index {
PRIORITY,
INGRESS,
EGRESS,
+   TRANSFER,
 
/* Validate/create pattern. */
PATTERN,
@@ -407,6 +408,7 @@ static const enum index next_vc_attr[] = {
PRIORITY,
INGRESS,
EGRESS,
+   TRANSFER,
PATTERN,
ZERO,
 };
@@ -960,6 +962,12 @@ static const struct token token_list[] = {
.next = NEXT(next_vc_attr),
.call = parse_vc,
},
+   [TRANSFER] = {
+   .name = "transfer",
+   .help = "apply rule directly to endpoints found in pattern",
+   .next = NEXT(next_vc_attr),
+   .call = parse_vc,
+   },
/* Validate/create pattern. */
[PATTERN] = {
.name = "pattern",
@@ -1945,6 +1953,9 @@ parse_vc(struct context *ctx, const struct token *token,
case EGRESS:
out->args.vc.attr.egress = 1;
return len;
+   case TRANSFER:
+   out->args.vc.attr.transfer = 1;
+   return len;
case PATTERN:
out->args.vc.pattern =
(void *)RTE_ALIGN_CEIL((uintptr_t)(out + 1),
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 562fb2f8d..a50a5c544 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -1239,6 +1239,7 @@ port_flow_complain(struct rte_flow_error *error)
[RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field",
[RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field",
[RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field",
+   [RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER] = "transfer field",
[RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure",
[RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length",
[RTE_FLOW_ERROR_TYPE_ITEM_SPEC] = "item specification",
@@ -1504,12 +1505,13 @@ port_flow_list(portid_t port_id, uint32_t n, const 
uint32_t group[n])
const struct rte_flow_item *item = pf->pattern;
const struct rte_flow_action *action = pf->actions;
 
-   printf("%" PRIu32 "\t%" PRIu32 "\t%" PRIu32 "\t%c%c\t",
+   printf("%" PRIu32 "\t%" PRIu32 "\t%" PRIu32 "\t%c%c%c\t",
   pf->id,
   pf->attr.group,
   pf->attr.priority,
   pf->attr.ingress ? 'i' : '-',
-  pf->attr.egress ? 'e' : '-');
+  pf->attr.egress ? 'e' : '-',
+  pf->attr.transfer ? 't' : '-');
while (item->type != RTE_FLOW_ITEM_TYPE_END) {
if (item->type != RTE_FLOW_ITEM_TYPE_VOID)
printf("%s ", flow_item[item->type].name);
diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index c62a80566..550a4c95b 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -170,7 +170,13 @@ Note that support for more than a single priority level is 
not guaranteed.
 Attribute: Traffic direction
 
 
-Flow rules can apply to inbound and/or outbound traffic (ingress/egress).
+Flow ru

[dpdk-dev] [PATCH v5 14/16] ethdev: rename physical port item in flow API

2018-04-19 Thread Adrien Mazarguil
While RTE_FLOW_ITEM_TYPE_PORT refers to physical ports of the underlying
device using specific identifiers, these are often confused with DPDK port
IDs exposed to applications in the global name space.

Since this pattern item is seldom used, rename it RTE_FLOW_ITEM_PHY_PORT
for better clarity.

No ABI impact.

Signed-off-by: Adrien Mazarguil 
Acked-by: Andrew Rybchenko 
---
 app/test-pmd/cmdline_flow.c | 27 +++--
 app/test-pmd/config.c   |  2 +-
 doc/guides/prog_guide/rte_flow.rst  | 22 -
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |  2 +-
 lib/librte_ether/rte_flow.c |  2 +-
 lib/librte_ether/rte_flow.h | 31 ++--
 6 files changed, 41 insertions(+), 45 deletions(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 41103de67..f9f937277 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -87,8 +87,8 @@ enum index {
ITEM_PF,
ITEM_VF,
ITEM_VF_ID,
-   ITEM_PORT,
-   ITEM_PORT_INDEX,
+   ITEM_PHY_PORT,
+   ITEM_PHY_PORT_INDEX,
ITEM_RAW,
ITEM_RAW_RELATIVE,
ITEM_RAW_SEARCH,
@@ -441,7 +441,7 @@ static const enum index next_item[] = {
ITEM_ANY,
ITEM_PF,
ITEM_VF,
-   ITEM_PORT,
+   ITEM_PHY_PORT,
ITEM_RAW,
ITEM_ETH,
ITEM_VLAN,
@@ -482,8 +482,8 @@ static const enum index item_vf[] = {
ZERO,
 };
 
-static const enum index item_port[] = {
-   ITEM_PORT_INDEX,
+static const enum index item_phy_port[] = {
+   ITEM_PHY_PORT_INDEX,
ITEM_NEXT,
ZERO,
 };
@@ -1059,18 +1059,19 @@ static const struct token token_list[] = {
.next = NEXT(item_vf, NEXT_ENTRY(UNSIGNED), item_param),
.args = ARGS(ARGS_ENTRY(struct rte_flow_item_vf, id)),
},
-   [ITEM_PORT] = {
-   .name = "port",
-   .help = "device-specific physical port index to use",
-   .priv = PRIV_ITEM(PORT, sizeof(struct rte_flow_item_port)),
-   .next = NEXT(item_port),
+   [ITEM_PHY_PORT] = {
+   .name = "phy_port",
+   .help = "match traffic from/to a specific physical port",
+   .priv = PRIV_ITEM(PHY_PORT,
+ sizeof(struct rte_flow_item_phy_port)),
+   .next = NEXT(item_phy_port),
.call = parse_vc,
},
-   [ITEM_PORT_INDEX] = {
+   [ITEM_PHY_PORT_INDEX] = {
.name = "index",
.help = "physical port index",
-   .next = NEXT(item_port, NEXT_ENTRY(UNSIGNED), item_param),
-   .args = ARGS(ARGS_ENTRY(struct rte_flow_item_port, index)),
+   .next = NEXT(item_phy_port, NEXT_ENTRY(UNSIGNED), item_param),
+   .args = ARGS(ARGS_ENTRY(struct rte_flow_item_phy_port, index)),
},
[ITEM_RAW] = {
.name = "raw",
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index a50a5c544..840320108 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -976,7 +976,7 @@ static const struct {
MK_FLOW_ITEM(ANY, sizeof(struct rte_flow_item_any)),
MK_FLOW_ITEM(PF, 0),
MK_FLOW_ITEM(VF, sizeof(struct rte_flow_item_vf)),
-   MK_FLOW_ITEM(PORT, sizeof(struct rte_flow_item_port)),
+   MK_FLOW_ITEM(PHY_PORT, sizeof(struct rte_flow_item_phy_port)),
MK_FLOW_ITEM(RAW, sizeof(struct rte_flow_item_raw)),
MK_FLOW_ITEM(ETH, sizeof(struct rte_flow_item_eth)),
MK_FLOW_ITEM(VLAN, sizeof(struct rte_flow_item_vlan)),
diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index a0a124aa2..4e053c24b 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -195,8 +195,8 @@ When supported, this effectively enables an application to 
reroute traffic
 not necessarily intended for it (e.g. coming from or addressed to different
 physical ports, VFs or applications) at the device level.
 
-It complements the behavior of some pattern items such as `Item: PORT`_ and
-is meaningless without them.
+It complements the behavior of some pattern items such as `Item: PHY_PORT`_
+and is meaningless without them.
 
 When transferring flow rules, **ingress** and **egress** attributes
 (`Attribute: Traffic direction`_) keep their original meaning, as if
@@ -583,15 +583,15 @@ separate entities, should be addressed through their own 
DPDK port IDs.
| ``mask`` | ``id``   | zeroed to match any VF ID |
+--+--+---+
 
-Item: ``PORT``
-^^
+Item: ``PHY_PORT``
+^^
 
-Matches packets coming from the specified physical port of the underlying
-device.
+Matches traffic originating from (ingress) or going to (egress) a physical
+port of the underlying device.
 
-The first PORT item overrides the physical p

[dpdk-dev] [PATCH v5 13/16] ethdev: update behavior of VF/PF in flow API

2018-04-19 Thread Adrien Mazarguil
Contrary to all other pattern items, these are inconsistently documented as
affecting traffic instead of simply matching its origin, without provision
for the latter.

This commit clarifies documentation and updates PMDs since the original
behavior now has to be explicitly requested using the new transfer
attribute.

It breaks ABI compatibility for the following public functions:

- rte_flow_create()
- rte_flow_validate()

Impacted PMDs are bnxt and i40e, for which the VF pattern item is now only
supported when a transfer attribute is also present.

Signed-off-by: Adrien Mazarguil 
Cc: Ajit Khaparde 
Cc: Somnath Kotur 
Cc: Beilei Xing 
Cc: Qi Zhang 
---
 app/test-pmd/cmdline_flow.c | 12 +++---
 doc/guides/prog_guide/rte_flow.rst  | 36 +-
 doc/guides/testpmd_app_ug/testpmd_funcs.rst | 12 +++---
 drivers/net/bnxt/bnxt_filter.c  | 22 ++-
 drivers/net/i40e/i40e_flow.c| 23 +++-
 lib/librte_ether/rte_flow.h | 47 ++--
 6 files changed, 77 insertions(+), 75 deletions(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 1c6b5a112..41103de67 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -1041,21 +1041,21 @@ static const struct token token_list[] = {
},
[ITEM_PF] = {
.name = "pf",
-   .help = "match packets addressed to the physical function",
+   .help = "match traffic from/to the physical function",
.priv = PRIV_ITEM(PF, 0),
.next = NEXT(NEXT_ENTRY(ITEM_NEXT)),
.call = parse_vc,
},
[ITEM_VF] = {
.name = "vf",
-   .help = "match packets addressed to a virtual function ID",
+   .help = "match traffic from/to a virtual function ID",
.priv = PRIV_ITEM(VF, sizeof(struct rte_flow_item_vf)),
.next = NEXT(item_vf),
.call = parse_vc,
},
[ITEM_VF_ID] = {
.name = "id",
-   .help = "destination VF ID",
+   .help = "VF ID",
.next = NEXT(item_vf, NEXT_ENTRY(UNSIGNED), item_param),
.args = ARGS(ARGS_ENTRY(struct rte_flow_item_vf, id)),
},
@@ -1686,14 +1686,14 @@ static const struct token token_list[] = {
},
[ACTION_PF] = {
.name = "pf",
-   .help = "redirect packets to physical device function",
+   .help = "direct traffic to physical function",
.priv = PRIV_ACTION(PF, 0),
.next = NEXT(NEXT_ENTRY(ACTION_NEXT)),
.call = parse_vc,
},
[ACTION_VF] = {
.name = "vf",
-   .help = "redirect packets to virtual device function",
+   .help = "direct traffic to a virtual function ID",
.priv = PRIV_ACTION(VF, sizeof(struct rte_flow_action_vf)),
.next = NEXT(action_vf),
.call = parse_vc,
@@ -1708,7 +1708,7 @@ static const struct token token_list[] = {
},
[ACTION_VF_ID] = {
.name = "id",
-   .help = "VF ID to redirect packets to",
+   .help = "VF ID",
.next = NEXT(action_vf, NEXT_ENTRY(UNSIGNED)),
.args = ARGS(ARGS_ENTRY(struct rte_flow_action_vf, id)),
.call = parse_vc_conf,
diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index 550a4c95b..a0a124aa2 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -528,15 +528,12 @@ Usage example, matching non-TCPv4 packets only:
 Item: ``PF``
 
 
-Matches packets addressed to the physical function of the device.
+Matches traffic originating from (ingress) or going to (egress) the physical
+function of the current device.
 
-If the underlying device function differs from the one that would normally
-receive the matched traffic, specifying this item prevents it from reaching
-that device unless the flow rule contains a `Action: PF`_. Packets are not
-duplicated between device instances by default.
+If supported, should work even if the physical function is not managed by
+the application and thus not associated with a DPDK port ID.
 
-- Likely to return an error or never match any traffic if applied to a VF
-  device.
 - Can be combined with any number of `Item: VF`_ to match both PF and VF
   traffic.
 - ``spec``, ``last`` and ``mask`` must not be set.
@@ -558,15 +555,15 @@ duplicated between device instances by default.
 Item: ``VF``
 
 
-Matches packets addressed to a virtual function ID of the device.
+Matches traffic originating from (ingress) or going to (egress) a given
+virtual function of the current device.
 
-If the underlying device function differs from the one that would normally
-receive the matched traffic, speci

[dpdk-dev] [PATCH v5 15/16] ethdev: add physical port action to flow API

2018-04-19 Thread Adrien Mazarguil
This patch adds the missing action counterpart to the PHY_PORT pattern
item, that is, the ability to directly inject matching traffic into a
physical port of the underlying device.

It breaks ABI compatibility for the following public functions:

- rte_flow_copy()
- rte_flow_create()
- rte_flow_query()
- rte_flow_validate()

Signed-off-by: Adrien Mazarguil 
Acked-by: Andrew Rybchenko 
Acked-by: Mohammad Abdul Awal 
Cc: "Zhang, Qi Z" 
---
 app/test-pmd/cmdline_flow.c | 35 
 app/test-pmd/config.c   |  1 +
 doc/guides/prog_guide/rte_flow.rst  | 20 ++
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |  5 
 lib/librte_ether/rte_flow.c |  1 +
 lib/librte_ether/rte_flow.h | 22 +++
 6 files changed, 84 insertions(+)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index f9f937277..356714801 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -182,6 +182,9 @@ enum index {
ACTION_VF,
ACTION_VF_ORIGINAL,
ACTION_VF_ID,
+   ACTION_PHY_PORT,
+   ACTION_PHY_PORT_ORIGINAL,
+   ACTION_PHY_PORT_INDEX,
ACTION_METER,
ACTION_METER_ID,
 };
@@ -623,6 +626,7 @@ static const enum index next_action[] = {
ACTION_RSS,
ACTION_PF,
ACTION_VF,
+   ACTION_PHY_PORT,
ACTION_METER,
ZERO,
 };
@@ -657,6 +661,13 @@ static const enum index action_vf[] = {
ZERO,
 };
 
+static const enum index action_phy_port[] = {
+   ACTION_PHY_PORT_ORIGINAL,
+   ACTION_PHY_PORT_INDEX,
+   ACTION_NEXT,
+   ZERO,
+};
+
 static const enum index action_meter[] = {
ACTION_METER_ID,
ACTION_NEXT,
@@ -1714,6 +1725,30 @@ static const struct token token_list[] = {
.args = ARGS(ARGS_ENTRY(struct rte_flow_action_vf, id)),
.call = parse_vc_conf,
},
+   [ACTION_PHY_PORT] = {
+   .name = "phy_port",
+   .help = "direct packets to physical port index",
+   .priv = PRIV_ACTION(PHY_PORT,
+   sizeof(struct rte_flow_action_phy_port)),
+   .next = NEXT(action_phy_port),
+   .call = parse_vc,
+   },
+   [ACTION_PHY_PORT_ORIGINAL] = {
+   .name = "original",
+   .help = "use original port index if possible",
+   .next = NEXT(action_phy_port, NEXT_ENTRY(BOOLEAN)),
+   .args = ARGS(ARGS_ENTRY_BF(struct rte_flow_action_phy_port,
+  original, 1)),
+   .call = parse_vc_conf,
+   },
+   [ACTION_PHY_PORT_INDEX] = {
+   .name = "index",
+   .help = "physical port index",
+   .next = NEXT(action_phy_port, NEXT_ENTRY(UNSIGNED)),
+   .args = ARGS(ARGS_ENTRY(struct rte_flow_action_phy_port,
+   index)),
+   .call = parse_vc_conf,
+   },
[ACTION_METER] = {
.name = "meter",
.help = "meter the directed packets at given id",
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 840320108..2d68f1fb0 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -1074,6 +1074,7 @@ static const struct {
MK_FLOW_ACTION(RSS, sizeof(struct rte_flow_action_rss)),
MK_FLOW_ACTION(PF, 0),
MK_FLOW_ACTION(VF, sizeof(struct rte_flow_action_vf)),
+   MK_FLOW_ACTION(PHY_PORT, sizeof(struct rte_flow_action_phy_port)),
MK_FLOW_ACTION(METER, sizeof(struct rte_flow_action_meter)),
 };
 
diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index 4e053c24b..a39c1e1b0 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -1433,6 +1433,26 @@ See `Item: VF`_.
| ``id``   | VF ID  |
+--++
 
+Action: ``PHY_PORT``
+
+
+Directs matching traffic to a given physical port index of the underlying
+device.
+
+See `Item: PHY_PORT`_.
+
+.. _table_rte_flow_action_phy_port:
+
+.. table:: PHY_PORT
+
+   +--+-+
+   | Field| Value   |
+   +==+=+
+   | ``original`` | use original port index if possible |
+   +--+-+
+   | ``index``| physical port index |
+   +--+-+
+
 Action: ``METER``
 ^
 
diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst 
b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
index a2bbd1930..64d8dfddb 100644
--- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
+++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
@@ -3423,6 +3423,11 @@ This section lists supported act

[dpdk-dev] [PATCH v5 16/16] ethdev: add port ID item and action to flow API

2018-04-19 Thread Adrien Mazarguil
RTE_FLOW_ACTION_TYPE_PORT_ID brings the ability to inject matching traffic
into a different device, as identified by its DPDK port ID.

This is normally only supported when the target port ID has some kind of
relationship with the port ID the flow rule is created against, such as
being exposed by a common physical device (e.g. a different port of an
Ethernet switch).

The converse pattern item, RTE_FLOW_ITEM_TYPE_PORT_ID, makes the resulting
flow rule match traffic whose origin is the specified port ID. Note that
specifying a port ID that differs from the one the flow rule is created
against is normally meaningless (if even accepted), but can make sense if
combined with the transfer attribute.

These must not be confused with their PHY_PORT counterparts, which refer to
physical ports using device-specific indices, but unlike PORT_ID are not
necessarily tied to DPDK port IDs.

This breaks ABI compatibility for the following public functions:

- rte_flow_copy()
- rte_flow_create()
- rte_flow_query()
- rte_flow_validate()

Signed-off-by: Adrien Mazarguil 
Reviewed-by: Qi Zhang 
Acked-by: Andrew Rybchenko 
Cc: "Zhang, Qi Z" 
Cc: Declan Doherty 

---

This patch provides the same functionality and supersedes Qi Zhang's
"ether: add flow action to redirect packet to a port" [1].

The main differences are:

- Action is named PORT_ID instead of PORT.
- Addition of a PORT_ID pattern item.
- More extensive documentation.
- Testpmd support.
- rte_flow_copy() support.

[1] http://dpdk.org/ml/archives/dev/2018-April/094648.html
---
 app/test-pmd/cmdline_flow.c | 57 
 app/test-pmd/config.c   |  2 +
 doc/guides/prog_guide/rte_flow.rst  | 48 
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |  9 
 lib/librte_ether/rte_flow.c |  2 +
 lib/librte_ether/rte_flow.h | 56 +++
 6 files changed, 174 insertions(+)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 356714801..32fe6645a 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -89,6 +89,8 @@ enum index {
ITEM_VF_ID,
ITEM_PHY_PORT,
ITEM_PHY_PORT_INDEX,
+   ITEM_PORT_ID,
+   ITEM_PORT_ID_ID,
ITEM_RAW,
ITEM_RAW_RELATIVE,
ITEM_RAW_SEARCH,
@@ -185,6 +187,9 @@ enum index {
ACTION_PHY_PORT,
ACTION_PHY_PORT_ORIGINAL,
ACTION_PHY_PORT_INDEX,
+   ACTION_PORT_ID,
+   ACTION_PORT_ID_ORIGINAL,
+   ACTION_PORT_ID_ID,
ACTION_METER,
ACTION_METER_ID,
 };
@@ -445,6 +450,7 @@ static const enum index next_item[] = {
ITEM_PF,
ITEM_VF,
ITEM_PHY_PORT,
+   ITEM_PORT_ID,
ITEM_RAW,
ITEM_ETH,
ITEM_VLAN,
@@ -491,6 +497,12 @@ static const enum index item_phy_port[] = {
ZERO,
 };
 
+static const enum index item_port_id[] = {
+   ITEM_PORT_ID_ID,
+   ITEM_NEXT,
+   ZERO,
+};
+
 static const enum index item_raw[] = {
ITEM_RAW_RELATIVE,
ITEM_RAW_SEARCH,
@@ -627,6 +639,7 @@ static const enum index next_action[] = {
ACTION_PF,
ACTION_VF,
ACTION_PHY_PORT,
+   ACTION_PORT_ID,
ACTION_METER,
ZERO,
 };
@@ -668,6 +681,13 @@ static const enum index action_phy_port[] = {
ZERO,
 };
 
+static const enum index action_port_id[] = {
+   ACTION_PORT_ID_ORIGINAL,
+   ACTION_PORT_ID_ID,
+   ACTION_NEXT,
+   ZERO,
+};
+
 static const enum index action_meter[] = {
ACTION_METER_ID,
ACTION_NEXT,
@@ -1084,6 +1104,20 @@ static const struct token token_list[] = {
.next = NEXT(item_phy_port, NEXT_ENTRY(UNSIGNED), item_param),
.args = ARGS(ARGS_ENTRY(struct rte_flow_item_phy_port, index)),
},
+   [ITEM_PORT_ID] = {
+   .name = "port_id",
+   .help = "match traffic from/to a given DPDK port ID",
+   .priv = PRIV_ITEM(PORT_ID,
+ sizeof(struct rte_flow_item_port_id)),
+   .next = NEXT(item_port_id),
+   .call = parse_vc,
+   },
+   [ITEM_PORT_ID_ID] = {
+   .name = "id",
+   .help = "DPDK port ID",
+   .next = NEXT(item_port_id, NEXT_ENTRY(UNSIGNED), item_param),
+   .args = ARGS(ARGS_ENTRY(struct rte_flow_item_port_id, id)),
+   },
[ITEM_RAW] = {
.name = "raw",
.help = "match an arbitrary byte string",
@@ -1749,6 +1783,29 @@ static const struct token token_list[] = {
index)),
.call = parse_vc_conf,
},
+   [ACTION_PORT_ID] = {
+   .name = "port_id",
+   .help = "direct matching traffic to a given DPDK port ID",
+   .priv = PRIV_ACTION(PORT_ID,
+   sizeof(struct rte_flow_action_port_id)),
+   .nex

Re: [dpdk-dev] [PATCH v4 03/11] net/mlx5: support L3 VXLAN flow

2018-04-19 Thread Xueming(Steven) Li


> -Original Message-
> From: Nélio Laranjeiro 
> Sent: Thursday, April 19, 2018 2:56 PM
> To: Xueming(Steven) Li 
> Cc: Shahaf Shuler ; dev@dpdk.org
> Subject: Re: [PATCH v4 03/11] net/mlx5: support L3 VXLAN flow
> 
> On Thu, Apr 19, 2018 at 06:20:50AM +, Xueming(Steven) Li wrote:
> >
> >
> > > -Original Message-
> > > From: Nélio Laranjeiro 
> > > Sent: Wednesday, April 18, 2018 11:09 PM
> > > To: Xueming(Steven) Li 
> > > Cc: Shahaf Shuler ; dev@dpdk.org
> > > Subject: Re: [PATCH v4 03/11] net/mlx5: support L3 VXLAN flow
> > >
> > > On Wed, Apr 18, 2018 at 02:43:30PM +, Xueming(Steven) Li wrote:
> > > >
> > > >
> > > > > -Original Message-
> > > > > From: Nélio Laranjeiro 
> > > > > Sent: Wednesday, April 18, 2018 2:49 PM
> > > > > To: Xueming(Steven) Li 
> > > > > Cc: Shahaf Shuler ; dev@dpdk.org
> > > > > Subject: Re: [PATCH v4 03/11] net/mlx5: support L3 VXLAN flow
> > > > >
> > > > > On Tue, Apr 17, 2018 at 11:14:28PM +0800, Xueming Li wrote:
> > > > > > This patch support L3 VXLAN, no inner L2 header comparing to
> > > > > > standard VXLAN protocol. L3 VXLAN using specific overlay UDP
> > > > > > destination port to discriminate against standard VXLAN, FW
> > > > > > has to be configured to support
> > > > > > it:
> > > > > >   sudo mlxconfig -d  -y s IP_OVER_VXLAN_EN=1
> > > > > >   sudo mlxconfig -d  -y s IP_OVER_VXLAN_PORT=
> > > > > >
> > > > > > Signed-off-by: Xueming Li 
> > > > > > ---
> > > > > >  drivers/net/mlx5/mlx5_flow.c | 4 +++-
> > > > > >  1 file changed, 3 insertions(+), 1 deletion(-)
> > > > > >
> > > > > > diff --git a/drivers/net/mlx5/mlx5_flow.c
> > > > > > b/drivers/net/mlx5/mlx5_flow.c index 771d5f14d..d7a921dff
> > > > > > 100644
> > > > > > --- a/drivers/net/mlx5/mlx5_flow.c
> > > > > > +++ b/drivers/net/mlx5/mlx5_flow.c
> > > > > > @@ -413,7 +413,9 @@ static const struct mlx5_flow_items 
> > > > > > mlx5_flow_items[] = {
> > > > > > .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
> > > > > > },
> > > > > > [RTE_FLOW_ITEM_TYPE_VXLAN] = {
> > > > > > -   .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
> > > > > > +   .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
> > > > > > +  RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 
> > > > > > VXLAN. */
> > > > > > +  RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 
> > > > > > VXLAN. */
> > > > > > .actions = valid_actions,
> > > > > > .mask = &(const struct rte_flow_item_vxlan){
> > > > > > .vni = "\xff\xff\xff",
> > > > > > --
> > > > > > 2.13.3
> > > > >
> > > > > Such support must be under device parameter has it depends on
> > > > > the configuration of the firmware.  If the firmware is not
> > > > > correctly configured the PMD must refuse
> > > such rule.
> > > > >
> > > > > Thanks,
> > > > >
> > > > > --
> > > > > Nélio Laranjeiro
> > > > > 6WIND
> > > >
> > > > Are you suggesting Verbs parameter? I'm afraid we can't have it in
> > > > short time, need new patch in later release when Verbs ready.
> > >
> > > Take a look at [1], this is what I mean.
> >
> > Enabling a new device parameter can't make L3 VXLAN packet get
> > received if fw configuration not set.
> 
> So you expect than the user will enable a feature without reading the PMD 
> documentation?
> If it is the case, the answer it pretty simple, it is the same as above, read 
> the PMD documentation.
> 
> > On the other hand, if fw continuation enabled and device parameter not
> > set, packet could be received but failed to create rule.
> 
> Again a user using a NIC should read the documentation.

If a user read the document, fw should be configured correctly to enable this 
feature.

> 
> > I'm afraid that a device parameter will introduce complexity of using
> > this feature w/o real benefits.
> 
> Add this missing device parameter and update accordingly the documentation, 
> or wait for Verbs to add
> the missing query feature.
> 
> If the firmware it not configured this rule must be refused, as there is no 
> way in the PMD to know if
> the firmware is configured, it must rely on a device parameter.

Let's keep the design simple, users know exactly what they are doing and should 
not expecting 
such flow working by reading document.

> 
> Regards,
> 
> --
> Nélio Laranjeiro
> 6WIND


Re: [dpdk-dev] [dpdk-stable] [PATCH v5 10/11] ethdev: fix missing boolean values in flow command

2018-04-19 Thread Adrien Mazarguil
On Wed, Apr 18, 2018 at 06:16:18PM +0100, Ferruh Yigit wrote:
> On 4/17/2018 9:25 AM, Adrien Mazarguil wrote:
> > On Mon, Apr 16, 2018 at 07:17:44PM +, Matan Azrad wrote:
> >> Hi Adrien
> >>
> >> I think the title should be "app/testpmd: ..." ...
> > 
> > Indeed, I'll update it in the next iteration, thanks.
> 
> If this is the only issue, it can be fixed while applying.
> Are there more outstanding issues, will there be a new version of the set?

Yes, submitted after rebasing it [1].

[1] http://dpdk.org/ml/archives/dev/2018-April/098035.html

-- 
Adrien Mazarguil
6WIND


[dpdk-dev] [PATCH] crypto/mrvl: update maintainers

2018-04-19 Thread Tomasz Duszynski
Jacek will no longer be maintaining mrvl crypto PMD.
Special thanks to him for development and support.

Signed-off-by: Tomasz Duszynski 
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index fe7c59d..0dba588 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -724,7 +724,6 @@ F: doc/guides/cryptodevs/kasumi.rst
 F: doc/guides/cryptodevs/features/kasumi.ini

 Marvell Mrvl
-M: Jacek Siuda 
 M: Tomasz Duszynski 
 M: Dmitri Epshtein 
 M: Natalie Samsonov 
--
2.7.4



[dpdk-dev] [PATCH] net/mvpp2: update maintainers

2018-04-19 Thread Tomasz Duszynski
Jacek will no longer be maintaining mvpp2 net PMD.
Special thanks to him for his development and support.

Signed-off-by: Tomasz Duszynski 
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index f43e3fe..fe7c59d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -534,7 +534,6 @@ F: doc/guides/nics/mlx5.rst
 F: doc/guides/nics/features/mlx5.ini

 Marvell mvpp2
-M: Jacek Siuda 
 M: Tomasz Duszynski 
 M: Dmitri Epshtein 
 M: Natalie Samsonov 
--
2.7.4



Re: [dpdk-dev] [PATCH v2 1/2] net/pcap: physical interface MAC support

2018-04-19 Thread Ferruh Yigit
On 4/19/2018 6:16 AM, Kuusisaari, Juhamatti wrote:
 Why a build time config option for this? Can we make it a runtime devarg?
>>>
>>> Sure, we can make it a devarg. Or do we even need that? Are there a lot of
>> test dependencies that would need to be fixed if we have it enabled by
>> default?
>>
>> Not test dependencies but this may be overkill for some usecases, I prefer
>> making this dynamically configurable, no strong opinion though.
> 
> OK, I'll take a look at this and craft a new version.
>  
>>>
 Overall we are trying to reduce config options already and this seems
 no need to be build time option at all.

 btw, this is a little late in release cycle, so lets target this
 patch for next release.
>>>
>>> The patch is on top of net-next, this should be just fine.
>>
>> Perhaps we should rename the sub-tree :) because this is not happening first
>> time. next-net is not for next release, as it has been Linux, it is for this
>> release but just a sub-tree for net PMDs.
> 
> Aha, while reading the docs it says: "All sub-repositories are merged into 
> main repository for -rc1 and -rc2", so I kind of thought this sub-repo is 
> going to the next release, as you have rc2 out already.

rc2 is not out yet, we are still working for rc1, which should be soon.

> 
>>>
 Thanks,
 ferruh
>>>
> 
> Thanks,
> --
>  Juhamatti
> 



Re: [dpdk-dev] [PATCH v4 01/11] crypto: replace rte_panic instances in crypto driver

2018-04-19 Thread Trahe, Fiona
Hi Arnon,
Can you change subject to crypto/dpaa:... please as it's only affecting that 
driver.
Fiona

> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Arnon Warshavsky
> Sent: Thursday, April 19, 2018 7:01 AM
> To: tho...@monjalon.net; Burakov, Anatoly ; Lu, 
> Wenzhuo
> ; Doherty, Declan ;
> jerin.ja...@caviumnetworks.com; Richardson, Bruce 
> ; Yigit, Ferruh
> 
> Cc: dev@dpdk.org; ar...@qwilt.com
> Subject: [dpdk-dev] [PATCH v4 01/11] crypto: replace rte_panic instances in 
> crypto driver
> 
> replace panic calls with log and return value.
> 
> --
> v2:
> - reformat error message to include literal string in a single line
> v4: replace -1 return value with -ENOMEM
> 
> Signed-off-by: Arnon Warshavsky 
> ---
>  drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c | 8 +---
>  drivers/crypto/dpaa_sec/dpaa_sec.c  | 8 +---
>  2 files changed, 10 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c
> b/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c
> index 23012e3..d465a2d 100644
> --- a/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c
> +++ b/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c
> @@ -2861,9 +2861,11 @@ struct rte_security_ops dpaa2_sec_security_ops = {
>   RTE_CACHE_LINE_SIZE,
>   rte_socket_id());
> 
> - if (cryptodev->data->dev_private == NULL)
> - rte_panic("Cannot allocate memzone for private "
> -   "device data");
> + if (cryptodev->data->dev_private == NULL) {
> + RTE_LOG(ERR, PMD, "%s() Cannot allocate memzone for 
> private device data",
> + __func__);
> + return -ENOMEM;
> + }
>   }
> 
>   dpaa2_dev->cryptodev = cryptodev;
> diff --git a/drivers/crypto/dpaa_sec/dpaa_sec.c 
> b/drivers/crypto/dpaa_sec/dpaa_sec.c
> index b685220..7b63650 100644
> --- a/drivers/crypto/dpaa_sec/dpaa_sec.c
> +++ b/drivers/crypto/dpaa_sec/dpaa_sec.c
> @@ -2356,9 +2356,11 @@ struct rte_security_ops dpaa_sec_security_ops = {
>   RTE_CACHE_LINE_SIZE,
>   rte_socket_id());
> 
> - if (cryptodev->data->dev_private == NULL)
> - rte_panic("Cannot allocate memzone for private "
> - "device data");
> + if (cryptodev->data->dev_private == NULL) {
> + RTE_LOG(ERR, PMD, "%s() Cannot allocate memzone for 
> private device data",
> + __func__);
> + return -ENOMEM;
> + }
>   }
> 
>   dpaa_dev->crypto_dev = cryptodev;
> --
> 1.8.3.1



[dpdk-dev] [PATCH] examples/ip_pipeline: fix buffer not null terminated

2018-04-19 Thread Kevin Laatz
The destination string may not have a NULL termination if the source's
string is equal to the sizeof(mempool->name).

Using strlcpy in place of strncpy fixes this issue as strlcpy guarantees
NULL termination.

Coverity issue: 272588
Fixes: 6bfe74f8c93e ("examples/ip_pipeline: add mempool object")
Cc: jasvinder.si...@intel.com

Signed-off-by: Kevin Laatz 
---
 examples/ip_pipeline/mempool.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/ip_pipeline/mempool.c b/examples/ip_pipeline/mempool.c
index 33b9243..f5d2a7d 100644
--- a/examples/ip_pipeline/mempool.c
+++ b/examples/ip_pipeline/mempool.c
@@ -6,6 +6,7 @@
 #include 
 
 #include 
+#include 
 
 #include "mempool.h"
 
@@ -70,7 +71,7 @@ mempool_create(const char *name, struct mempool_params 
*params)
}
 
/* Node fill in */
-   strncpy(mempool->name, name, sizeof(mempool->name));
+   strlcpy(mempool->name, name, sizeof(mempool->name));
mempool->m = m;
mempool->buffer_size = params->buffer_size;
 
-- 
2.9.5



[dpdk-dev] [PATCH] examples/ip_pipeline: fix buffer not null terminated

2018-04-19 Thread Kevin Laatz
The destination string may not have a NULL termination if the source's
string is equal to the sizeof(tmgr_port->name).

Using strlcpy in place of strncpy fixes this issue as strlcpy guarantees
NULL termination.

Coverity issue: 272592
Fixes: 25961ff3bcb9 ("examples/ip_pipeline: add traffic manager object")
Cc: jasvinder.si...@intel.com

Signed-off-by: Kevin Laatz 
---
 examples/ip_pipeline/tmgr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index b46ca96..40cbf1d 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -4,6 +4,8 @@
 
 #include 
 
+#include 
+
 #include "tmgr.h"
 
 static struct rte_sched_subport_params
@@ -148,7 +150,7 @@ tmgr_port_create(const char *name, struct tmgr_port_params 
*params)
}
 
/* Node fill in */
-   strncpy(tmgr_port->name, name, sizeof(tmgr_port->name));
+   strlcpy(tmgr_port->name, name, sizeof(tmgr_port->name));
tmgr_port->s = s;
tmgr_port->n_subports_per_port = params->n_subports_per_port;
tmgr_port->n_pipes_per_subport = params->n_pipes_per_subport;
-- 
2.9.5



[dpdk-dev] [PATCH] examples/ip_pipeline: fix resource leak

2018-04-19 Thread Kevin Laatz
Closing the fd_server file descriptor on error to fix the resource leak.

Coverity issue: 272587
Fixes: 4bbf8e30aa5e ("examples/ip_pipeline: add CLI interface")
Cc: jasvinder.si...@intel.com

Signed-off-by: Kevin Laatz 
---
 examples/ip_pipeline/conn.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/examples/ip_pipeline/conn.c b/examples/ip_pipeline/conn.c
index 9338942..6b08e9e 100644
--- a/examples/ip_pipeline/conn.c
+++ b/examples/ip_pipeline/conn.c
@@ -96,12 +96,14 @@ conn_init(struct conn_params *p)
sizeof(server_address));
if (status == -1) {
conn_free(conn);
+   close(fd_server);
return NULL;
}
 
status = listen(fd_server, 16);
if (status == -1) {
conn_free(conn);
+   close(fd_server);
return NULL;
}
 
@@ -109,6 +111,7 @@ conn_init(struct conn_params *p)
fd_client_group = epoll_create(1);
if (fd_client_group == -1) {
conn_free(conn);
+   close(fd_server);
return NULL;
}
 
-- 
2.9.5



Re: [dpdk-dev] [dpdk-stable] [PATCH v3 1/7] ethdev: fix port data reset timing

2018-04-19 Thread Ferruh Yigit
On 3/30/2018 11:39 AM, Ferruh Yigit wrote:
> On 3/28/2018 1:07 PM, Matan Azrad wrote:
>> Hi Ferruh
>>
>>> From: Ferruh Yigit, Wednesday, March 28, 2018 1:38 AM
>>> On 3/5/2018 3:12 PM, Matan Azrad wrote:
 Hi Ferruh

 From: Ferruh Yigit, Sent: Monday, March 5, 2018 5:07 PM
> On 3/5/2018 2:52 PM, Matan Azrad wrote:
>> HI
>>
>> From: Ferruh Yigit, Sent: Monday, March 5, 2018 1:24 PM
>>> On 1/18/2018 4:35 PM, Matan Azrad wrote:
 rte_eth_dev_data structure is allocated per ethdev port and can be
 used to get a data of the port internally.

 rte_eth_dev_attach_secondary tries to find the port identifier
 using rte_eth_dev_data name field comparison and may get an
 identifier of invalid port in case of this port was released by
 the primary process because the port release API doesn't reset the
>>> port data.

 So, it will be better to reset the port data in release time
 instead of allocation time.

 Move the port data reset to the port release API.

 Fixes: d948f596fee2 ("ethdev: fix port data mismatched in multiple
 process model")
 Cc: sta...@dpdk.org

 Signed-off-by: Matan Azrad 
 ---
  lib/librte_ether/rte_ethdev.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/lib/librte_ether/rte_ethdev.c
 b/lib/librte_ether/rte_ethdev.c index 7044159..156231c 100644
 --- a/lib/librte_ether/rte_ethdev.c
 +++ b/lib/librte_ether/rte_ethdev.c
 @@ -204,7 +204,6 @@ struct rte_eth_dev *
return NULL;
}

 -  memset(&rte_eth_dev_data[port_id], 0, sizeof(struct
>>> rte_eth_dev_data));
eth_dev = eth_dev_get(port_id);
snprintf(eth_dev->data->name, sizeof(eth_dev->data->name),
>>> "%s", name);
eth_dev->data->port_id = port_id; @@ -252,6 +251,7 @@ struct
 rte_eth_dev *
if (eth_dev == NULL)
return -EINVAL;

 +  memset(eth_dev->data, 0, sizeof(struct
>>> rte_eth_dev_data));
>>>
>>> Hi Matan,
>>>
>>> What most of the vdev release path does is:
>>>
>>> eth_dev = rte_eth_dev_allocated(...)
>>> rte_free(eth_dev->data->dev_private);
>>> rte_free(eth_dev->data);
>>> rte_eth_dev_release_port(eth_dev);
>>>
>>> Since eth_dev->data freed, memset() it in
>>> rte_eth_dev_release_port() will be problem.
>>>
>>> We don't run remove path that is why we didn't hit the issue but
>>> this seems problem for all virtual PMDs.
>>
>> Yes, it is a problem and should be fixed:
>> For vdevs which use private rte_eth_dev_data the remove order can
>>> be:
>>  private_data = eth_dev->data;
>>  rte_free(eth_dev->data->dev_private);
>>  rte_eth_dev_release_port(eth_dev); /* The last operation working
> on ethdev structure. */
>>  rte_free(private_data);
>
> Do we need to save "private_data"?

 Just to emphasis that eth_dev structure should not more be available after
>>> rte_eth_dev_release_port().
 Maybe in the future rte_eth_dev_release_port() will zero eth_dev
 structure too :)
>>>
>>> Hi Matan,
>>>
>>> Reminder of this issue, it would be nice to fix in this release.
>>>
>>
>> Regarding the private rte_eth_dev_data, it should be fixed in the next 
>> thread:
>> https://dpdk.org/dev/patchwork/patch/35632/
>>
>> Regarding the rte_eth_dev_pci_release() function: I'm going to send a fix.
> 
> Thanks Matan for the patch,
> 
> But rte_eth_dev_release_port() is still broken because of this change, please
> check _rte_eth_dev_callback_process() which uses dev->data->port_id.

Hi Matan,

Any update on this?
As mentioned above rte_eth_dev_release_port() is still broken.

Thanks,
ferruh

> 
>>

>>
>>
>>> Also rte_eth_dev_pci_release() looks problematic now.
>>
>> Yes, again, the last operation working on ethdev structure should be
> rte_eth_dev_release_port().
>>
>> So need to fix all vdevs and the rte_eth_dev_pci_release() function.
>>
>> Any comments?
>>

>>
> 



Re: [dpdk-dev] [PATCH v4 1/5] lib/ethdev: support for inline IPsec events

2018-04-19 Thread Thomas Monjalon
11/04/2018 08:40, Anoob Joseph:
> Adding support for IPsec events in rte_eth_event framework. In inline
> IPsec offload, the per packet protocol defined variables, like ESN,
> would be managed by PMD. In such cases, PMD would need IPsec events
> to notify application about various conditions like, ESN overflow.
> 
> Signed-off-by: Anoob Joseph 
> Acked-by: Akhil Goyal 
> ---
> v4:
> * Added more details in documentation
> * Renamed members of struct rte_eth_event_ipsec_desc for better readablity

Good, thank you.

Acked-by: Thomas Monjalon 





Re: [dpdk-dev] [PATCH v4 03/11] net/mlx5: support L3 VXLAN flow

2018-04-19 Thread Nélio Laranjeiro
On Thu, Apr 19, 2018 at 10:21:26AM +, Xueming(Steven) Li wrote:
> 
> 
> > -Original Message-
> > From: Nélio Laranjeiro 
> > Sent: Thursday, April 19, 2018 2:56 PM
> > To: Xueming(Steven) Li 
> > Cc: Shahaf Shuler ; dev@dpdk.org
> > Subject: Re: [PATCH v4 03/11] net/mlx5: support L3 VXLAN flow
> > 
> > On Thu, Apr 19, 2018 at 06:20:50AM +, Xueming(Steven) Li wrote:
> > >
> > >
> > > > -Original Message-
> > > > From: Nélio Laranjeiro 
> > > > Sent: Wednesday, April 18, 2018 11:09 PM
> > > > To: Xueming(Steven) Li 
> > > > Cc: Shahaf Shuler ; dev@dpdk.org
> > > > Subject: Re: [PATCH v4 03/11] net/mlx5: support L3 VXLAN flow
> > > >
> > > > On Wed, Apr 18, 2018 at 02:43:30PM +, Xueming(Steven) Li wrote:
> > > > >
> > > > >
> > > > > > -Original Message-
> > > > > > From: Nélio Laranjeiro 
> > > > > > Sent: Wednesday, April 18, 2018 2:49 PM
> > > > > > To: Xueming(Steven) Li 
> > > > > > Cc: Shahaf Shuler ; dev@dpdk.org
> > > > > > Subject: Re: [PATCH v4 03/11] net/mlx5: support L3 VXLAN flow
> > > > > >
> > > > > > On Tue, Apr 17, 2018 at 11:14:28PM +0800, Xueming Li wrote:
> > > > > > > This patch support L3 VXLAN, no inner L2 header comparing to
> > > > > > > standard VXLAN protocol. L3 VXLAN using specific overlay UDP
> > > > > > > destination port to discriminate against standard VXLAN, FW
> > > > > > > has to be configured to support
> > > > > > > it:
> > > > > > >   sudo mlxconfig -d  -y s IP_OVER_VXLAN_EN=1
> > > > > > >   sudo mlxconfig -d  -y s IP_OVER_VXLAN_PORT=
> > > > > > >
> > > > > > > Signed-off-by: Xueming Li 
> > > > > > > ---
> > > > > > >  drivers/net/mlx5/mlx5_flow.c | 4 +++-
> > > > > > >  1 file changed, 3 insertions(+), 1 deletion(-)
> > > > > > >
> > > > > > > diff --git a/drivers/net/mlx5/mlx5_flow.c
> > > > > > > b/drivers/net/mlx5/mlx5_flow.c index 771d5f14d..d7a921dff
> > > > > > > 100644
> > > > > > > --- a/drivers/net/mlx5/mlx5_flow.c
> > > > > > > +++ b/drivers/net/mlx5/mlx5_flow.c
> > > > > > > @@ -413,7 +413,9 @@ static const struct mlx5_flow_items 
> > > > > > > mlx5_flow_items[] = {
> > > > > > >   .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
> > > > > > >   },
> > > > > > >   [RTE_FLOW_ITEM_TYPE_VXLAN] = {
> > > > > > > - .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
> > > > > > > + .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
> > > > > > > +RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 
> > > > > > > VXLAN. */
> > > > > > > +RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 
> > > > > > > VXLAN. */
> > > > > > >   .actions = valid_actions,
> > > > > > >   .mask = &(const struct rte_flow_item_vxlan){
> > > > > > >   .vni = "\xff\xff\xff",
> > > > > > > --
> > > > > > > 2.13.3
> > > > > >
> > > > > > Such support must be under device parameter has it depends on
> > > > > > the configuration of the firmware.  If the firmware is not
> > > > > > correctly configured the PMD must refuse
> > > > such rule.
> > > > > >
> > > > > > Thanks,
> > > > > >
> > > > > > --
> > > > > > Nélio Laranjeiro
> > > > > > 6WIND
> > > > >
> > > > > Are you suggesting Verbs parameter? I'm afraid we can't have it in
> > > > > short time, need new patch in later release when Verbs ready.
> > > >
> > > > Take a look at [1], this is what I mean.
> > >
> > > Enabling a new device parameter can't make L3 VXLAN packet get
> > > received if fw configuration not set.
> > 
> > So you expect than the user will enable a feature without reading the PMD 
> > documentation?
> > If it is the case, the answer it pretty simple, it is the same as above, 
> > read the PMD documentation.
> > 
> > > On the other hand, if fw continuation enabled and device parameter not
> > > set, packet could be received but failed to create rule.
> > 
> > Again a user using a NIC should read the documentation.
> 
> If a user read the document, fw should be configured correctly to enable this 
> feature.

And a user which does not read this document must not be able to create
rules the NIC cannot handle because the firmware is not configured.

> > > I'm afraid that a device parameter will introduce complexity of using
> > > this feature w/o real benefits.
> > 
> > Add this missing device parameter and update accordingly the documentation, 
> > or wait for Verbs to add
> > the missing query feature.
> > 
> > If the firmware it not configured this rule must be refused, as there is no 
> > way in the PMD to know if
> > the firmware is configured, it must rely on a device parameter.
> 
> Let's keep the design simple, users know exactly what they are doing and 
> should not expecting 
> such flow working by reading document.

This is exactly the opposite, users never read documentation even
today I've already spotted a new user to such documentation [1].

For this same reason a functionality not enabled by default in the
firmware must not be used by the PMD.  No device parameter no feature.

Add the device parameter and the according doc

Re: [dpdk-dev] [PATCH] eventdev: fix icc build

2018-04-19 Thread Thomas Monjalon
> > ICC complains about variable being used before its value is set.
> > Since the variable is only assigned in the for loop, its declaration is 
> > moved
> > inside and is initialized.
> > 
> > lib/librte_eventdev/rte_event_timer_adapter.c(708): error #592:
> > variable "ret" is used before its value is set
> > RTE_SET_USED(ret);
> > 
> > Fixes: 6750b21bd6af ("eventdev: add default software timer adapter")
> > Signed-off-by: Pablo de Lara 
> 
> Acked-by: Erik Gabriel Carrillo 

Applied, thanks





Re: [dpdk-dev] [PATCH] crypto/mrvl: add MRVL PMD to meson

2018-04-19 Thread De Lara Guarch, Pablo
Hi Tomasz,

> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Tomasz Duszynski
> Sent: Monday, April 16, 2018 7:08 AM
> To: dev@dpdk.org
> Cc: Richardson, Bruce ; d...@marvell.com;
> nsams...@marvell.com; Tomasz Duszynski 
> Subject: [dpdk-dev] [PATCH] crypto/mrvl: add MRVL PMD to meson
> 
> Add MRVL CRYPTO PMD to meson build system.
> 
> Signed-off-by: Tomasz Duszynski 

Could you make the same change that you made in the network driver into this 
patch?
Bruce will get this patch, as he has already applied the network driver patch, 
which will be a dependency of this one.

Thanks,
Pablo


Re: [dpdk-dev] vmxnet3 RX VLAN Strip offload incorrect behaviour

2018-04-19 Thread Padam Jeet Singh
Hi,

Does anyone have a work-around for this? Should we debug the vmxnet3 driver 
since for sure this issue is even coming in standard l2fwd example.

Thanks,
Padam

> On 12-Apr-2018, at 11:52 AM, Padam Jeet Singh  
> wrote:
> 
> 
> 
>> On 10-Apr-2018, at 11:43 AM, Yong Wang  wrote:
>> 
>> When using 4095, I assume you enabled trunk mode on ESX vswitch.  What ESX 
>> version did you use? Is the vswitch standard switch or DVS? We have tested 
>> over ESX6.0 and onwards plus DPDK 17.05 and it should work.
> 
> Yes, the 4095 enables trunk mode. ESXi version is "6.0.0 (Build 3620759)”, 
> the switch is a “Standard vSwitch”.
> 
> I did a simple change to the standard l2fwd sample application. Then tried 
> the same app once with e1000e emulation driver, and then with the vmxnet3 
> driver. Patch for changes done to the l2fwd code are available here: 
> https://pastebin.com/0RMJYKr0
> 
> The change I did was enable hw_vlan_strip = 1 and also call 
> rte_eth_dev_set_vlan_offload both post configure and post start.
> 
> With e1000e driver, sample packets arriving with different VLANs:
> 
> mbuff->vlan_tci=500 vlan_tci_outer=0 ethertype=8 
> mbuff->vlan_tci=500 vlan_tci_outer=0 ethertype=8 
> mbuff->vlan_tci=105 vlan_tci_outer=0 ethertype=8 
> mbuff->vlan_tci=100 vlan_tci_outer=0 ethertype=8 
> 
> 
> With vmxnet3, packets arriving with different VLANs:
> 
> mbuff->vlan_tci=0 vlan_tci_outer=0 ethertype=81 
> mbuff->vlan_tci=0 vlan_tci_outer=0 ethertype=81 
> mbuff->vlan_tci=0 vlan_tci_outer=0 ethertype=81 
> mbuff->vlan_tci=0 vlan_tci_outer=0 ethertype=81 
> mbuff->vlan_tci=0 vlan_tci_outer=0 ethertype=81 
> 
> the ether type values are not printed byte-swapped …  so it’s 8100 (VLAN) and 
> 0800 (IPv4).
> 
> 
> 
> 
>> 
>> On 4/8/18, 11:22 PM, "Padam Jeet Singh"  wrote:
>> 
>> 
>>> On 06-Apr-2018, at 11:12 PM, Yong Wang  wrote:
>>> 
>>> Padam,
>>> 
>>> As far as I know, this feature works.  What DPDK version did you use?
>> 
>>   DPDK Version 17.05
>> 
>>> Is there any port reconfiguration (stop/start/mtu change, etc) that could 
>>> lose your vlan offload settings (a dump of the port config at runtime will 
>>> be able to confirm this)?  Can you also post a snippet of packet capture of 
>>> the vlan traffic received?
>>> 
>> 
>>   It’s a standard configure followed by start. No MTU change/Stop. Here are 
>> the port configuration post the device start:
>> 
>>   2018-04-09T05:37:45.332573+00:00 INFO inventum fpnas : APP: -- Ethernet 
>> Port Info (0) --
>>   2018-04-09T05:37:45.332729+00:00 INFO inventum fpnas : APP:   Driver   
>>   net_vmxnet3
>>   2018-04-09T05:37:45.332822+00:00 INFO inventum fpnas : APP:   if_index 
>>   0
>>   2018-04-09T05:37:45.332926+00:00 INFO inventum fpnas : APP:   
>> min_rx_bufsize 1646
>>   2018-04-09T05:37:45.333015+00:00 INFO inventum fpnas : APP:   
>> max_rx_pktlen  16384
>>   2018-04-09T05:37:45.333102+00:00 INFO inventum fpnas : APP:   
>> max_rx_queues supported 16
>>   2018-04-09T05:37:45.333218+00:00 INFO inventum fpnas : APP:   
>> max_tx_queues supported 8
>>   2018-04-09T05:37:45.05+00:00 INFO inventum fpnas : APP:   rx_queues 
>> configured 2
>>   2018-04-09T05:37:45.90+00:00 INFO inventum fpnas : APP:   tx_queues 
>> configured 2
>>   2018-04-09T05:37:45.333475+00:00 INFO inventum fpnas : APP:   
>> max_mac_addrs  1
>>   2018-04-09T05:37:45.333560+00:00 INFO inventum fpnas : APP:   max_vfs  
>>   0
>>   2018-04-09T05:37:45.333644+00:00 INFO inventum fpnas : APP:   
>> max_vmdq_pools 0
>>   2018-04-09T05:37:45.333728+00:00 INFO inventum fpnas : APP:   
>> rx_offload_cap 29
>>   2018-04-09T05:37:45.333832+00:00 INFO inventum fpnas : APP:   
>> tx_offload_cap 45
>>   2018-04-09T05:37:45.333944+00:00 INFO inventum fpnas : APP:   
>> vlan_offload_flags 1
>> 
>> 
>>   And the VLAN offload setting while the device is running (with the packet 
>> coming from the test PC):
>> 
>>   vlan_offload_flag: 1
>> 
>> 
>>   The configuration as is as follows:
>> 
>>   [vmxnet3 based interface on VM with DPDK app] —— [ vswitch with VLAN 4095 
>> and uplink using Intel igb based adaptor] —— [ test PC with VLAN interface ]
>> 
>>   Packet sent by the PC (ARP packet):
>> 
>>   Frame 1: 46 bytes on wire (368 bits), 46 bytes captured (368 bits)
>>   Ethernet II, Src: 00:25:64:cf:f2:30, Dst: ff:ff:ff:ff:ff:ff
>>   802.1Q Virtual LAN, PRI: 0, DEI: 0, ID: 105
>>   Address Resolution Protocol (request)
>> 
>> 
>>   If I redo the test with the same physical and vmware setup but instead of 
>> vmxnet3 I use e1000e emulation driver, all of this works fine. Packets come 
>> properly stripped of VLAN with vlan_tci fields set correctly.
>> 
>> 
>>> Yong 
>>> 
>>> On 4/6/18, 6:51 AM, "dev on behalf of Padam Jeet Singh" 
>>>  wrote:
>>> 
>>>  Hi,
>>> 
>>>  When configuring the vmxnet3 based ethernet device, the RX VLAN Strip 
>>> offload does not work as it usually does with other real NICs which support 
>>> this function.
>>> 
>>>  When configuring with rxmode.hw_vlan_strip = 1, the i

[dpdk-dev] [PATCH 02/23] net/sfc/base: make RxQ type data an union

2018-04-19 Thread Andrew Rybchenko
The type is an internal interface. Single integer is insufficient
to carry RxQ type-specific information in the case of equal stride
super-buffer Rx mode (packet buffers per bucket, maximum DMA length,
packet stride, head of line block timeout).

Signed-off-by: Andrew Rybchenko 
---
 drivers/net/sfc/base/ef10_impl.h |  4 +++-
 drivers/net/sfc/base/ef10_rx.c   |  4 ++--
 drivers/net/sfc/base/efx_impl.h  | 13 -
 drivers/net/sfc/base/efx_rx.c| 18 --
 4 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/drivers/net/sfc/base/ef10_impl.h b/drivers/net/sfc/base/ef10_impl.h
index b4ad595..36229a7 100644
--- a/drivers/net/sfc/base/ef10_impl.h
+++ b/drivers/net/sfc/base/ef10_impl.h
@@ -967,13 +967,15 @@ externvoid
 ef10_rx_qenable(
__inefx_rxq_t *erp);
 
+union efx_rxq_type_data_u;
+
 extern __checkReturn   efx_rc_t
 ef10_rx_qcreate(
__inefx_nic_t *enp,
__inunsigned int index,
__inunsigned int label,
__inefx_rxq_type_t type,
-   __inuint32_t type_data,
+   __inconst union efx_rxq_type_data_u *type_data,
__inefsys_mem_t *esmp,
__insize_t ndescs,
__inuint32_t id,
diff --git a/drivers/net/sfc/base/ef10_rx.c b/drivers/net/sfc/base/ef10_rx.c
index 70e451f..32cca57 100644
--- a/drivers/net/sfc/base/ef10_rx.c
+++ b/drivers/net/sfc/base/ef10_rx.c
@@ -993,7 +993,7 @@ ef10_rx_qcreate(
__inunsigned int index,
__inunsigned int label,
__inefx_rxq_type_t type,
-   __inuint32_t type_data,
+   __inconst efx_rxq_type_data_t *type_data,
__inefsys_mem_t *esmp,
__insize_t ndescs,
__inuint32_t id,
@@ -1032,7 +1032,7 @@ ef10_rx_qcreate(
break;
 #if EFSYS_OPT_RX_PACKED_STREAM
case EFX_RXQ_TYPE_PACKED_STREAM:
-   switch (type_data) {
+   switch (type_data->ertd_packed_stream.eps_buf_size) {
case EFX_RXQ_PACKED_STREAM_BUF_SIZE_1M:
ps_buf_size = MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_1M;
break;
diff --git a/drivers/net/sfc/base/efx_impl.h b/drivers/net/sfc/base/efx_impl.h
index b1d4f57..f130713 100644
--- a/drivers/net/sfc/base/efx_impl.h
+++ b/drivers/net/sfc/base/efx_impl.h
@@ -129,6 +129,16 @@ typedef struct efx_tx_ops_s {
 #endif
 } efx_tx_ops_t;
 
+typedef union efx_rxq_type_data_u {
+   /* Dummy member to have non-empty union if no options are enabled */
+   uint32_tertd_dummy;
+#if EFSYS_OPT_RX_PACKED_STREAM
+   struct {
+   uint32_teps_buf_size;
+   } ertd_packed_stream;
+#endif
+} efx_rxq_type_data_t;
+
 typedef struct efx_rx_ops_s {
efx_rc_t(*erxo_init)(efx_nic_t *);
void(*erxo_fini)(efx_nic_t *);
@@ -165,7 +175,8 @@ typedef struct efx_rx_ops_s {
efx_rc_t(*erxo_qflush)(efx_rxq_t *);
void(*erxo_qenable)(efx_rxq_t *);
efx_rc_t(*erxo_qcreate)(efx_nic_t *enp, unsigned int,
-   unsigned int, efx_rxq_type_t, uint32_t,
+   unsigned int, efx_rxq_type_t,
+   const efx_rxq_type_data_t *,
efsys_mem_t *, size_t, uint32_t,
unsigned int,
efx_evq_t *, efx_rxq_t *);
diff --git a/drivers/net/sfc/base/efx_rx.c b/drivers/net/sfc/base/efx_rx.c
index d75957f..5f49b3a 100644
--- a/drivers/net/sfc/base/efx_rx.c
+++ b/drivers/net/sfc/base/efx_rx.c
@@ -107,7 +107,7 @@ siena_rx_qcreate(
__inunsigned int index,
__inunsigned int label,
__inefx_rxq_type_t type,
-   __inuint32_t type_data,
+   __inconst efx_rxq_type_data_t *type_data,
__inefsys_mem_t *esmp,
__insize_t ndescs,
__inuint32_t id,
@@ -745,7 +745,7 @@ efx_rx_qcreate_internal(
__inunsigned int index,
__inunsigned int label,
__inefx_rxq_type_t type,
-   __inuint32_t type_data,
+   __inconst efx_rxq_type_data_t *type_data,
__inefsys_mem_t *esmp,
__insize_t ndescs,
__inuint32_t id,
@@ -806,8 +806,8 @@ efx_rx_qcreate(
__inefx_evq_t *eep,
__deref_out efx_rxq_t **erpp)
 {
-   return efx_rx_qcreate_internal(enp, index, label, type, 0, esmp, ndescs,
-   id, flags, eep, erpp);
+   return efx_rx_qcreate_internal(enp, index, label, type, NULL,
+   esmp, ndescs, id, flags, eep, erpp);
 }
 
 #if EFSYS_OPT_RX_PACKE

[dpdk-dev] [PATCH 03/23] net/sfc/base: detect equal stride super-buffer support

2018-04-19 Thread Andrew Rybchenko
Equal stride super-buffer Rx mode is supported on Medford2 by
DPDK firmware variant.

Signed-off-by: Andrew Rybchenko 
---
 drivers/net/sfc/base/ef10_nic.c  | 6 ++
 drivers/net/sfc/base/efx.h   | 1 +
 drivers/net/sfc/base/siena_nic.c | 1 +
 3 files changed, 8 insertions(+)

diff --git a/drivers/net/sfc/base/ef10_nic.c b/drivers/net/sfc/base/ef10_nic.c
index e1f1c2e..35b719a 100644
--- a/drivers/net/sfc/base/ef10_nic.c
+++ b/drivers/net/sfc/base/ef10_nic.c
@@ -1114,6 +1114,12 @@ ef10_get_datapath_caps(
else
encp->enc_rx_var_packed_stream_supported = B_FALSE;
 
+   /* Check if the firmware supports equal stride super-buffer mode */
+   if (CAP_FLAGS2(req, EQUAL_STRIDE_SUPER_BUFFER))
+   encp->enc_rx_es_super_buffer_supported = B_TRUE;
+   else
+   encp->enc_rx_es_super_buffer_supported = B_FALSE;
+
/* Check if the firmware supports FW subvariant w/o Tx checksumming */
if (CAP_FLAGS2(req, FW_SUBVARIANT_NO_TX_CSUM))
encp->enc_fw_subvariant_no_tx_csum_supported = B_TRUE;
diff --git a/drivers/net/sfc/base/efx.h b/drivers/net/sfc/base/efx.h
index 0b75f0f..dea8d60 100644
--- a/drivers/net/sfc/base/efx.h
+++ b/drivers/net/sfc/base/efx.h
@@ -1270,6 +1270,7 @@ typedef struct efx_nic_cfg_s {
boolean_t   enc_init_evq_v2_supported;
boolean_t   enc_rx_packed_stream_supported;
boolean_t   enc_rx_var_packed_stream_supported;
+   boolean_t   enc_rx_es_super_buffer_supported;
boolean_t   enc_fw_subvariant_no_tx_csum_supported;
boolean_t   enc_pm_and_rxdp_counters;
boolean_t   enc_mac_stats_40g_tx_size_bins;
diff --git a/drivers/net/sfc/base/siena_nic.c b/drivers/net/sfc/base/siena_nic.c
index c3a9495..15aa06b 100644
--- a/drivers/net/sfc/base/siena_nic.c
+++ b/drivers/net/sfc/base/siena_nic.c
@@ -161,6 +161,7 @@ siena_board_cfg(
encp->enc_allow_set_mac_with_installed_filters = B_TRUE;
encp->enc_rx_packed_stream_supported = B_FALSE;
encp->enc_rx_var_packed_stream_supported = B_FALSE;
+   encp->enc_rx_es_super_buffer_supported = B_FALSE;
encp->enc_fw_subvariant_no_tx_csum_supported = B_FALSE;
 
/* Siena supports two 10G ports, and 8 lanes of PCIe Gen2 */
-- 
2.7.4



[dpdk-dev] [PATCH 09/23] net/sfc: conditionally compile support for tunnel packets

2018-04-19 Thread Andrew Rybchenko
Equal stride super-buffer Rx datapath does not support tunnels, code to
parse tunnel packet types and inner checksum offload is not required and
it is important to be able to compile it out on build time to avoid
extra CPU load.

Cutting of tunnels support relies on compiler optimizaitons to
be able to drop extra checks and branches if tun_ptype is always 0.

Signed-off-by: Andrew Rybchenko 
Reviewed-by: Ivan Malov 
---
 drivers/net/sfc/sfc_ef10_rx.c| 2 ++
 drivers/net/sfc/sfc_ef10_rx_ev.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/net/sfc/sfc_ef10_rx.c b/drivers/net/sfc/sfc_ef10_rx.c
index 7560891..5ec82db 100644
--- a/drivers/net/sfc/sfc_ef10_rx.c
+++ b/drivers/net/sfc/sfc_ef10_rx.c
@@ -25,6 +25,8 @@
 #include "sfc_dp_rx.h"
 #include "sfc_kvargs.h"
 #include "sfc_ef10.h"
+
+#define SFC_EF10_RX_EV_ENCAP_SUPPORT   1
 #include "sfc_ef10_rx_ev.h"
 
 #define sfc_ef10_rx_err(dpq, ...) \
diff --git a/drivers/net/sfc/sfc_ef10_rx_ev.h b/drivers/net/sfc/sfc_ef10_rx_ev.h
index 774a789..9054fb9 100644
--- a/drivers/net/sfc/sfc_ef10_rx_ev.h
+++ b/drivers/net/sfc/sfc_ef10_rx_ev.h
@@ -37,6 +37,7 @@ sfc_ef10_rx_ev_to_offloads(const efx_qword_t rx_ev, struct 
rte_mbuf *m,
if (unlikely(EFX_TEST_QWORD_BIT(rx_ev, ESF_DZ_RX_PARSE_INCOMPLETE_LBN)))
goto done;
 
+#if SFC_EF10_RX_EV_ENCAP_SUPPORT
switch (EFX_QWORD_FIELD(rx_ev, ESF_EZ_RX_ENCAP_HDR)) {
default:
/* Unexpected encapsulation tag class */
@@ -58,6 +59,7 @@ sfc_ef10_rx_ev_to_offloads(const efx_qword_t rx_ev, struct 
rte_mbuf *m,
tun_ptype = RTE_PTYPE_TUNNEL_NVGRE;
break;
}
+#endif
 
if (tun_ptype == 0) {
ip_csum_err_bit = ESF_DZ_RX_IPCKSUM_ERR_LBN;
-- 
2.7.4



[dpdk-dev] [PATCH 01/23] net/sfc/base: update autogenerated MCDI and TLV headers

2018-04-19 Thread Andrew Rybchenko
Equal stride super-buffer is a new name instead of deprecated equal
stride packed stream to avoid confusion with previous packed stream.

Signed-off-by: Andrew Rybchenko 
---
 drivers/net/sfc/base/ef10_tlv_layout.h |  22 ++
 drivers/net/sfc/base/efx_regs_mcdi.h   | 646 -
 2 files changed, 654 insertions(+), 14 deletions(-)

diff --git a/drivers/net/sfc/base/ef10_tlv_layout.h 
b/drivers/net/sfc/base/ef10_tlv_layout.h
index b19dc2a..56cffae 100644
--- a/drivers/net/sfc/base/ef10_tlv_layout.h
+++ b/drivers/net/sfc/base/ef10_tlv_layout.h
@@ -4,6 +4,14 @@
  * All rights reserved.
  */
 
+/*
+ * This is NOT the original source file. Do NOT edit it.
+ * To update the tlv layout, please edit the copy in
+ * the sfregistry repo and then, in that repo,
+ * "make tlv_headers" or "make export" to
+ * regenerate and export all types of headers.
+ */
+
 /* These structures define the layouts for the TLV items stored in static and
  * dynamic configuration partitions in NVRAM for EF10 (Huntington etc.).
  *
@@ -409,6 +417,7 @@ struct tlv_firmware_options {
  
MC_CMD_FW_PACKED_STREAM_HASH_MODE_1
 #define TLV_FIRMWARE_VARIANT_RULES_ENGINEMC_CMD_FW_RULES_ENGINE
 #define TLV_FIRMWARE_VARIANT_DPDKMC_CMD_FW_DPDK
+#define TLV_FIRMWARE_VARIANT_L3XUDP  MC_CMD_FW_L3XUDP
 };
 
 /* Voltage settings
@@ -986,4 +995,17 @@ struct tlv_fastpd_mode {
 #define TLV_FASTPD_MODE_FAST_SUPPORTED 2  /* Supported packet types to the 
FastPD; everything else to the SoftPD  */
 };
 
+/* L3xUDP datapath firmware UDP port configuration
+ *
+ * Sets the list of UDP ports on which the encapsulation will be handled.
+ * The number of ports in the list is implied by the length of the TLV item.
+ */
+#define TLV_TAG_L3XUDP_PORTS(0x102a)
+struct tlv_l3xudp_ports {
+  uint32_t tag;
+  uint32_t length;
+  uint16_t ports[];
+#define TLV_TAG_L3XUDP_PORTS_MAX_NUM_PORTS 16
+};
+
 #endif /* CI_MGMT_TLV_LAYOUT_H */
diff --git a/drivers/net/sfc/base/efx_regs_mcdi.h 
b/drivers/net/sfc/base/efx_regs_mcdi.h
index c939fdd..cf8a793 100644
--- a/drivers/net/sfc/base/efx_regs_mcdi.h
+++ b/drivers/net/sfc/base/efx_regs_mcdi.h
@@ -2740,6 +2740,8 @@
 #defineMC_CMD_DRV_ATTACH_IN_PREBOOT_WIDTH 1
 #defineMC_CMD_DRV_ATTACH_IN_SUBVARIANT_AWARE_LBN 2
 #defineMC_CMD_DRV_ATTACH_IN_SUBVARIANT_AWARE_WIDTH 1
+#defineMC_CMD_DRV_ATTACH_IN_WANT_VI_SPREADING_LBN 3
+#defineMC_CMD_DRV_ATTACH_IN_WANT_VI_SPREADING_WIDTH 1
 /* 1 to set new state, or 0 to just report the existing state */
 #defineMC_CMD_DRV_ATTACH_IN_UPDATE_OFST 4
 #defineMC_CMD_DRV_ATTACH_IN_UPDATE_LEN 4
@@ -2768,6 +2770,12 @@
  * bug69716)
  */
 #defineMC_CMD_FW_L3XUDP 0x7
+/* enum: Requests that the MC keep whatever datapath firmware is currently
+ * running. It's used for test purposes, where we want to be able to shmboot
+ * special test firmware variants. This option is only recognised in eftest
+ * (i.e. non-production) builds.
+ */
+#defineMC_CMD_FW_KEEP_CURRENT_EFTEST_ONLY 0xfffe
 /* enum: Only this option is allowed for non-admin functions */
 #defineMC_CMD_FW_DONT_CARE 0x
 
@@ -2797,6 +2805,11 @@
  * refers to the Sorrento external FPGA port.
  */
 #defineMC_CMD_DRV_ATTACH_EXT_OUT_FLAG_NO_ACTIVE_PORT 0x3
+/* enum: If set, indicates that VI spreading is currently enabled. Will always
+ * indicate the current state, regardless of the value in the WANT_VI_SPREADING
+ * input.
+ */
+#defineMC_CMD_DRV_ATTACH_EXT_OUT_FLAG_VI_SPREADING_ENABLED 0x4
 
 
 /***/
@@ -3600,6 +3613,37 @@
 /*Enum values, see field(s): */
 /*   100M */
 
+/* AN_TYPE structuredef: Auto-negotiation types defined in IEEE802.3 */
+#defineAN_TYPE_LEN 4
+#defineAN_TYPE_TYPE_OFST 0
+#defineAN_TYPE_TYPE_LEN 4
+/* enum: None, AN disabled or not supported */
+#defineMC_CMD_AN_NONE 0x0
+/* enum: Clause 28 - BASE-T */
+#defineMC_CMD_AN_CLAUSE28 0x1
+/* enum: Clause 37 - BASE-X */
+#defineMC_CMD_AN_CLAUSE37 0x2
+/* enum: Clause 73 - BASE-R startup protocol for backplane and copper cable
+ * assemblies. Includes Clause 72/Clause 92 link-training.
+ */
+#defineMC_CMD_AN_CLAUSE73 0x3
+#defineAN_TYPE_TYPE_LBN 0
+#defineAN_TYPE_TYPE_WIDTH 32
+
+/* FEC_TYPE structuredef: Forward error correction types defined in IEEE802.3
+ */
+#defineFEC_TYPE_LEN 4
+#defineFEC_TYPE_TYPE_OFST 0
+#defineFEC_TYPE_TYPE_LEN 4
+/* enum: No FEC */
+#defineMC_CMD_FEC_NONE 0x0
+/* enum: Clause 74 BASE-R FEC (a.k.a Firecode) */
+#defineMC_CMD_FEC_BASER 0x1
+/* enum: Clause 91/Clause 108 Reed-Solomon FEC */
+#defineMC_CMD_FEC_RS 0x2
+#defineFEC_TYPE_TYPE_LBN 0
+#defineFEC_TYPE_TYPE_WIDTH 32
+
 
 /***/
 /* MC_CMD_GET_LINK
@@ -3616,10 +3660,14 @@
 
 /* MC_CMD

[dpdk-dev] [PATCH 21/23] net/sfc: make processing of flow rule actions more uniform

2018-04-19 Thread Andrew Rybchenko
From: Roman Zhukov 

Prepare function that parse flow rule actions to support not
fate-deciding actions.

Signed-off-by: Roman Zhukov 
Signed-off-by: Andrew Rybchenko 
---
 drivers/net/sfc/sfc_flow.c | 57 ++
 1 file changed, 37 insertions(+), 20 deletions(-)

diff --git a/drivers/net/sfc/sfc_flow.c b/drivers/net/sfc/sfc_flow.c
index 55226f1..bec29ae 100644
--- a/drivers/net/sfc/sfc_flow.c
+++ b/drivers/net/sfc/sfc_flow.c
@@ -1498,7 +1498,10 @@ sfc_flow_parse_actions(struct sfc_adapter *sa,
   struct rte_flow_error *error)
 {
int rc;
-   boolean_t is_specified = B_FALSE;
+   uint32_t actions_set = 0;
+   const uint32_t fate_actions_mask = (1UL << RTE_FLOW_ACTION_TYPE_QUEUE) |
+  (1UL << RTE_FLOW_ACTION_TYPE_RSS) |
+  (1UL << RTE_FLOW_ACTION_TYPE_DROP);
 
if (actions == NULL) {
rte_flow_error_set(error, EINVAL,
@@ -1507,21 +1510,22 @@ sfc_flow_parse_actions(struct sfc_adapter *sa,
return -rte_errno;
}
 
+#define SFC_BUILD_SET_OVERFLOW(_action, _set) \
+   RTE_BUILD_BUG_ON(_action >= sizeof(_set) * CHAR_BIT)
+
for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
-   /* This one may appear anywhere multiple times. */
-   if (actions->type == RTE_FLOW_ACTION_TYPE_VOID)
-   continue;
-   /* Fate-deciding actions may appear exactly once. */
-   if (is_specified) {
-   rte_flow_error_set
-   (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
-actions,
-"Cannot combine several fate-deciding actions,"
-"choose between QUEUE, RSS or DROP");
-   return -rte_errno;
-   }
switch (actions->type) {
+   case RTE_FLOW_ACTION_TYPE_VOID:
+   SFC_BUILD_SET_OVERFLOW(RTE_FLOW_ACTION_TYPE_VOID,
+  actions_set);
+   break;
+
case RTE_FLOW_ACTION_TYPE_QUEUE:
+   SFC_BUILD_SET_OVERFLOW(RTE_FLOW_ACTION_TYPE_QUEUE,
+  actions_set);
+   if ((actions_set & fate_actions_mask) != 0)
+   goto fail_fate_actions;
+
rc = sfc_flow_parse_queue(sa, actions->conf, flow);
if (rc != 0) {
rte_flow_error_set(error, EINVAL,
@@ -1529,11 +1533,14 @@ sfc_flow_parse_actions(struct sfc_adapter *sa,
"Bad QUEUE action");
return -rte_errno;
}
-
-   is_specified = B_TRUE;
break;
 
case RTE_FLOW_ACTION_TYPE_RSS:
+   SFC_BUILD_SET_OVERFLOW(RTE_FLOW_ACTION_TYPE_RSS,
+  actions_set);
+   if ((actions_set & fate_actions_mask) != 0)
+   goto fail_fate_actions;
+
rc = sfc_flow_parse_rss(sa, actions->conf, flow);
if (rc != 0) {
rte_flow_error_set(error, rc,
@@ -1541,15 +1548,16 @@ sfc_flow_parse_actions(struct sfc_adapter *sa,
"Bad RSS action");
return -rte_errno;
}
-
-   is_specified = B_TRUE;
break;
 
case RTE_FLOW_ACTION_TYPE_DROP:
+   SFC_BUILD_SET_OVERFLOW(RTE_FLOW_ACTION_TYPE_DROP,
+  actions_set);
+   if ((actions_set & fate_actions_mask) != 0)
+   goto fail_fate_actions;
+
flow->spec.template.efs_dmaq_id =
EFX_FILTER_SPEC_RX_DMAQ_ID_DROP;
-
-   is_specified = B_TRUE;
break;
 
default:
@@ -1558,15 +1566,24 @@ sfc_flow_parse_actions(struct sfc_adapter *sa,
   "Action is not supported");
return -rte_errno;
}
+
+   actions_set |= (1UL << actions->type);
}
+#undef SFC_BUILD_SET_OVERFLOW
 
/* When fate is unknown, drop traffic. */
-   if (!is_specified) {
+   if ((actions_set & fate_actions_mask) == 0) {
flow->spec.template.efs_dmaq_id =
EFX_FILTER_SPEC_RX_DMAQ_ID_DROP;
}
 
return 0;
+
+fail_fate_actions:
+   rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, actions,
+  

[dpdk-dev] [PATCH 18/23] net/sfc/base: get actions MARK and FLAG support

2018-04-19 Thread Andrew Rybchenko
From: Roman Zhukov 

Filter actions MARK and FLAG are supported on Medford2 by DPDK
firmware variant.

Signed-off-by: Roman Zhukov 
Signed-off-by: Andrew Rybchenko 
---
 drivers/net/sfc/base/ef10_nic.c  | 10 ++
 drivers/net/sfc/base/efx.h   |  3 +++
 drivers/net/sfc/base/siena_nic.c |  3 +++
 3 files changed, 16 insertions(+)

diff --git a/drivers/net/sfc/base/ef10_nic.c b/drivers/net/sfc/base/ef10_nic.c
index 35b719a..b28226d 100644
--- a/drivers/net/sfc/base/ef10_nic.c
+++ b/drivers/net/sfc/base/ef10_nic.c
@@ -1294,6 +1294,16 @@ ef10_get_datapath_caps(
 */
encp->enc_rx_scale_l4_hash_supported = B_TRUE;
}
+   /* Check if the firmware supports "FLAG" and "MARK" filter actions */
+   if (CAP_FLAGS2(req, FILTER_ACTION_FLAG))
+   encp->enc_filter_action_flag_supported = B_TRUE;
+   else
+   encp->enc_filter_action_flag_supported = B_FALSE;
+
+   if (CAP_FLAGS2(req, FILTER_ACTION_MARK))
+   encp->enc_filter_action_mark_supported = B_TRUE;
+   else
+   encp->enc_filter_action_mark_supported = B_FALSE;
 
 #undef CAP_FLAGS1
 #undef CAP_FLAGS2
diff --git a/drivers/net/sfc/base/efx.h b/drivers/net/sfc/base/efx.h
index b334cc5..cd0e6f8 100644
--- a/drivers/net/sfc/base/efx.h
+++ b/drivers/net/sfc/base/efx.h
@@ -1293,6 +1293,9 @@ typedef struct efx_nic_cfg_s {
/* Firmware support for extended MAC_STATS buffer */
uint32_tenc_mac_stats_nstats;
boolean_t   enc_fec_counters;
+   /* Firmware support for "FLAG" and "MARK" filter actions */
+   boolean_t   enc_filter_action_flag_supported;
+   boolean_t   enc_filter_action_mark_supported;
 } efx_nic_cfg_t;
 
 #defineEFX_PCI_FUNCTION_IS_PF(_encp)   ((_encp)->enc_vf == 0x)
diff --git a/drivers/net/sfc/base/siena_nic.c b/drivers/net/sfc/base/siena_nic.c
index 15aa06b..b703369 100644
--- a/drivers/net/sfc/base/siena_nic.c
+++ b/drivers/net/sfc/base/siena_nic.c
@@ -172,6 +172,9 @@ siena_board_cfg(
 
encp->enc_mac_stats_nstats = MC_CMD_MAC_NSTATS;
 
+   encp->enc_filter_action_flag_supported = B_FALSE;
+   encp->enc_filter_action_mark_supported = B_FALSE;
+
return (0);
 
 fail2:
-- 
2.7.4



[dpdk-dev] [PATCH 12/23] net/sfc: support equal stride super-buffer Rx mode

2018-04-19 Thread Andrew Rybchenko
HW Rx descriptor represents many contiguous packet buffers which
follow each other. Number of buffers, stride and maximum DMA
length are setup-time configurable per Rx queue based on provided
mempool. The mempool must support contiguous block allocation and
get info API to retrieve number of objects in the block.

Signed-off-by: Andrew Rybchenko 
Reviewed-by: Ivan Malov 
---
 doc/guides/nics/sfc_efx.rst|  20 +-
 drivers/net/sfc/Makefile   |   1 +
 drivers/net/sfc/efsys.h|   2 +-
 drivers/net/sfc/meson.build|   1 +
 drivers/net/sfc/sfc_dp.h   |   3 +-
 drivers/net/sfc/sfc_dp_rx.h|   8 +
 drivers/net/sfc/sfc_ef10.h |   3 +
 drivers/net/sfc/sfc_ef10_essb_rx.c | 643 +
 drivers/net/sfc/sfc_ef10_rx.c  |   2 +-
 drivers/net/sfc/sfc_ef10_rx_ev.h   |   5 +-
 drivers/net/sfc/sfc_ethdev.c   |   6 +
 drivers/net/sfc/sfc_ev.c   |  34 ++
 drivers/net/sfc/sfc_kvargs.h   |   4 +-
 drivers/net/sfc/sfc_rx.c   |  45 ++-
 drivers/net/sfc/sfc_rx.h   |   1 +
 15 files changed, 767 insertions(+), 11 deletions(-)
 create mode 100644 drivers/net/sfc/sfc_ef10_essb_rx.c

diff --git a/doc/guides/nics/sfc_efx.rst b/doc/guides/nics/sfc_efx.rst
index abaed67..bbc6e61 100644
--- a/doc/guides/nics/sfc_efx.rst
+++ b/doc/guides/nics/sfc_efx.rst
@@ -121,6 +121,21 @@ required in the receive buffer.
 It should be taken into account when mbuf pool for receive is created.
 
 
+Equal stride super-buffer mode
+~~
+
+When the receive queue uses equal stride super-buffer DMA mode, one HW Rx
+descriptor carries many Rx buffers which contiguously follow each other
+with some stride (equal to total size of rte_mbuf as mempool object).
+Each Rx buffer is an independent rte_mbuf.
+However dedicated mempool manager must be used when mempool for the Rx
+queue is created. The manager must support dequeue of the contiguous
+block of objects and provide mempool info API to get the block size.
+
+Another limitation of a equal stride super-buffer mode, imposed by the
+firmware, is that it allows for a single RSS context.
+
+
 Tunnels support
 ---
 
@@ -291,7 +306,7 @@ whitelist option like "-w 02:00.0,arg1=value1,...".
 Case-insensitive 1/y/yes/on or 0/n/no/off may be used to specify
 boolean parameters value.
 
-- ``rx_datapath`` [auto|efx|ef10] (default **auto**)
+- ``rx_datapath`` [auto|efx|ef10|ef10_esps] (default **auto**)
 
   Choose receive datapath implementation.
   **auto** allows the driver itself to make a choice based on firmware
@@ -300,6 +315,9 @@ boolean parameters value.
   **ef10** chooses EF10 (SFN7xxx, SFN8xxx, X2xxx) native datapath which is
   more efficient than libefx-based and provides richer packet type
   classification, but lacks Rx scatter support.
+  **ef10_esps** chooses SFNX2xxx equal stride packed stream datapath
+  which may be used on DPDK firmware variant only
+  (see notes about its limitations above).
 
 - ``tx_datapath`` [auto|efx|ef10|ef10_simple] (default **auto**)
 
diff --git a/drivers/net/sfc/Makefile b/drivers/net/sfc/Makefile
index f3e0b4b..3bb41a0 100644
--- a/drivers/net/sfc/Makefile
+++ b/drivers/net/sfc/Makefile
@@ -81,6 +81,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += sfc_filter.c
 SRCS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += sfc_flow.c
 SRCS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += sfc_dp.c
 SRCS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += sfc_ef10_rx.c
+SRCS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += sfc_ef10_essb_rx.c
 SRCS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += sfc_ef10_tx.c
 
 VPATH += $(SRCDIR)/base
diff --git a/drivers/net/sfc/efsys.h b/drivers/net/sfc/efsys.h
index f71581c..b9d2df5 100644
--- a/drivers/net/sfc/efsys.h
+++ b/drivers/net/sfc/efsys.h
@@ -198,7 +198,7 @@ prefetch_read_once(const volatile void *addr)
 
 #define EFSYS_OPT_RX_PACKED_STREAM 0
 
-#define EFSYS_OPT_RX_ES_SUPER_BUFFER 0
+#define EFSYS_OPT_RX_ES_SUPER_BUFFER 1
 
 #define EFSYS_OPT_TUNNEL 1
 
diff --git a/drivers/net/sfc/meson.build b/drivers/net/sfc/meson.build
index 0de2e17..3aa14c7 100644
--- a/drivers/net/sfc/meson.build
+++ b/drivers/net/sfc/meson.build
@@ -54,6 +54,7 @@ sources = files(
'sfc_flow.c',
'sfc_dp.c',
'sfc_ef10_rx.c',
+   'sfc_ef10_essb_rx.c',
'sfc_ef10_tx.c'
 )
 
diff --git a/drivers/net/sfc/sfc_dp.h b/drivers/net/sfc/sfc_dp.h
index 26e7195..3da65ab 100644
--- a/drivers/net/sfc/sfc_dp.h
+++ b/drivers/net/sfc/sfc_dp.h
@@ -79,7 +79,8 @@ struct sfc_dp {
enum sfc_dp_typetype;
/* Mask of required hardware/firmware capabilities */
unsigned inthw_fw_caps;
-#define SFC_DP_HW_FW_CAP_EF10  0x1
+#define SFC_DP_HW_FW_CAP_EF10  0x1
+#define SFC_DP_HW_FW_CAP_RX_ES_SUPER_BUFFER0x2
 };
 
 /** List of datapath variants */
diff --git a/drivers/net/sfc/sfc_dp_rx.h b/drivers/net/sfc/sfc_dp_rx.h
index ecb486f..db075dd 100644
--- a/drivers/net/sfc/sfc_dp_r

[dpdk-dev] [PATCH 22/23] net/sfc: support MARK and FLAG actions in flow API

2018-04-19 Thread Andrew Rybchenko
From: Roman Zhukov 

Signed-off-by: Roman Zhukov 
Signed-off-by: Andrew Rybchenko 
---
 doc/guides/nics/sfc_efx.rst |  4 +++
 drivers/net/sfc/sfc_flow.c  | 64 +
 2 files changed, 68 insertions(+)

diff --git a/doc/guides/nics/sfc_efx.rst b/doc/guides/nics/sfc_efx.rst
index c9354e3..bbf698e 100644
--- a/doc/guides/nics/sfc_efx.rst
+++ b/doc/guides/nics/sfc_efx.rst
@@ -201,6 +201,10 @@ Supported actions:
 
 - DROP
 
+- FLAG (supported only with ef10_essb Rx datapath)
+
+- MARK (supported only with ef10_essb Rx datapath)
+
 Validating flow rules depends on the firmware variant.
 
 Ethernet destinaton individual/group match
diff --git a/drivers/net/sfc/sfc_flow.c b/drivers/net/sfc/sfc_flow.c
index bec29ae..afd688d 100644
--- a/drivers/net/sfc/sfc_flow.c
+++ b/drivers/net/sfc/sfc_flow.c
@@ -23,6 +23,7 @@
 #include "sfc_filter.h"
 #include "sfc_flow.h"
 #include "sfc_log.h"
+#include "sfc_dp_rx.h"
 
 /*
  * At now flow API is implemented in such a manner that each
@@ -1492,16 +1493,35 @@ sfc_flow_filter_remove(struct sfc_adapter *sa,
 }
 
 static int
+sfc_flow_parse_mark(struct sfc_adapter *sa,
+   const struct rte_flow_action_mark *mark,
+   struct rte_flow *flow)
+{
+   const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
+
+   if (mark == NULL || mark->id > encp->enc_filter_action_mark_max)
+   return EINVAL;
+
+   flow->spec.template.efs_flags |= EFX_FILTER_FLAG_ACTION_MARK;
+   flow->spec.template.efs_mark = mark->id;
+
+   return 0;
+}
+
+static int
 sfc_flow_parse_actions(struct sfc_adapter *sa,
   const struct rte_flow_action actions[],
   struct rte_flow *flow,
   struct rte_flow_error *error)
 {
int rc;
+   const unsigned int dp_rx_features = sa->dp_rx->features;
uint32_t actions_set = 0;
const uint32_t fate_actions_mask = (1UL << RTE_FLOW_ACTION_TYPE_QUEUE) |
   (1UL << RTE_FLOW_ACTION_TYPE_RSS) |
   (1UL << RTE_FLOW_ACTION_TYPE_DROP);
+   const uint32_t mark_actions_mask = (1UL << RTE_FLOW_ACTION_TYPE_MARK) |
+  (1UL << RTE_FLOW_ACTION_TYPE_FLAG);
 
if (actions == NULL) {
rte_flow_error_set(error, EINVAL,
@@ -1560,6 +1580,45 @@ sfc_flow_parse_actions(struct sfc_adapter *sa,
EFX_FILTER_SPEC_RX_DMAQ_ID_DROP;
break;
 
+   case RTE_FLOW_ACTION_TYPE_FLAG:
+   SFC_BUILD_SET_OVERFLOW(RTE_FLOW_ACTION_TYPE_FLAG,
+  actions_set);
+   if ((actions_set & mark_actions_mask) != 0)
+   goto fail_actions_overlap;
+
+   if ((dp_rx_features & SFC_DP_RX_FEAT_FLOW_FLAG) == 0) {
+   rte_flow_error_set(error, ENOTSUP,
+   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+   "FLAG action is not supported on the 
current Rx datapath");
+   return -rte_errno;
+   }
+
+   flow->spec.template.efs_flags |=
+   EFX_FILTER_FLAG_ACTION_FLAG;
+   break;
+
+   case RTE_FLOW_ACTION_TYPE_MARK:
+   SFC_BUILD_SET_OVERFLOW(RTE_FLOW_ACTION_TYPE_MARK,
+  actions_set);
+   if ((actions_set & mark_actions_mask) != 0)
+   goto fail_actions_overlap;
+
+   if ((dp_rx_features & SFC_DP_RX_FEAT_FLOW_MARK) == 0) {
+   rte_flow_error_set(error, ENOTSUP,
+   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+   "MARK action is not supported on the 
current Rx datapath");
+   return -rte_errno;
+   }
+
+   rc = sfc_flow_parse_mark(sa, actions->conf, flow);
+   if (rc != 0) {
+   rte_flow_error_set(error, rc,
+   RTE_FLOW_ERROR_TYPE_ACTION, actions,
+   "Bad MARK action");
+   return -rte_errno;
+   }
+   break;
+
default:
rte_flow_error_set(error, ENOTSUP,
   RTE_FLOW_ERROR_TYPE_ACTION, actions,
@@ -1584,6 +1643,11 @@ sfc_flow_parse_actions(struct sfc_adapter *sa,
   "Cannot combine several fate-deciding actions, "
   "choose between QUEUE, RSS or DROP");
return -rte_errno;
+
+fail_actions_overlap:
+   rte_

[dpdk-dev] [PATCH 23/23] doc: advertise equal stride super-buffer Rx mode support in net/sfc

2018-04-19 Thread Andrew Rybchenko
Signed-off-by: Andrew Rybchenko 
---
 doc/guides/rel_notes/release_18_05.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/guides/rel_notes/release_18_05.rst 
b/doc/guides/rel_notes/release_18_05.rst
index b8f526b..e99c2a6 100644
--- a/doc/guides/rel_notes/release_18_05.rst
+++ b/doc/guides/rel_notes/release_18_05.rst
@@ -62,6 +62,8 @@ New Features
   * Added support for Solarflare XtremeScale X2xxx family adapters.
   * Added support for NVGRE, VXLAN and GENEVE filters in flow API.
   * Added support for DROP action in flow API.
+  * Added support for equal stride super-buffer Rx mode (X2xxx only).
+  * Added support for MARK and FLAG actions in flow API (X2xxx only).
 
 * **Added Ethernet poll mode driver for AMD XGBE devices.**
 
-- 
2.7.4



[dpdk-dev] [PATCH 06/23] net/sfc: factor out function to push Rx doorbell

2018-04-19 Thread Andrew Rybchenko
The function may be shared by different Rx datapath implementations.

Signed-off-by: Andrew Rybchenko 
Reviewed-by: Ivan Malov 
Reviewed-by: Andy Moreton 
---
 drivers/net/sfc/sfc_ef10.h| 31 +++
 drivers/net/sfc/sfc_ef10_rx.c | 33 +++--
 2 files changed, 34 insertions(+), 30 deletions(-)

diff --git a/drivers/net/sfc/sfc_ef10.h b/drivers/net/sfc/sfc_ef10.h
index ace6a1d..865359f 100644
--- a/drivers/net/sfc/sfc_ef10.h
+++ b/drivers/net/sfc/sfc_ef10.h
@@ -79,6 +79,37 @@ sfc_ef10_ev_present(const efx_qword_t ev)
   ~EFX_QWORD_FIELD(ev, EFX_DWORD_1);
 }
 
+
+/**
+ * Alignment requirement for value written to RX WPTR:
+ * the WPTR must be aligned to an 8 descriptor boundary.
+ */
+#define SFC_EF10_RX_WPTR_ALIGN 8u
+
+static inline void
+sfc_ef10_rx_qpush(volatile void *doorbell, unsigned int added,
+ unsigned int ptr_mask)
+{
+   efx_dword_t dword;
+
+   /* Hardware has alignment restriction for WPTR */
+   RTE_BUILD_BUG_ON(SFC_RX_REFILL_BULK % SFC_EF10_RX_WPTR_ALIGN != 0);
+   SFC_ASSERT(RTE_ALIGN(added, SFC_EF10_RX_WPTR_ALIGN) == added);
+
+   EFX_POPULATE_DWORD_1(dword, ERF_DZ_RX_DESC_WPTR, added & ptr_mask);
+
+   /* DMA sync to device is not required */
+
+   /*
+* rte_write32() has rte_io_wmb() which guarantees that the STORE
+* operations (i.e. Rx and event descriptor updates) that precede
+* the rte_io_wmb() call are visible to NIC before the STORE
+* operations that follow it (i.e. doorbell write).
+*/
+   rte_write32(dword.ed_u32[0], doorbell);
+}
+
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/drivers/net/sfc/sfc_ef10_rx.c b/drivers/net/sfc/sfc_ef10_rx.c
index 7d6b64e..92e1ef0 100644
--- a/drivers/net/sfc/sfc_ef10_rx.c
+++ b/drivers/net/sfc/sfc_ef10_rx.c
@@ -30,12 +30,6 @@
SFC_DP_LOG(SFC_KVARG_DATAPATH_EF10, ERR, dpq, __VA_ARGS__)
 
 /**
- * Alignment requirement for value written to RX WPTR:
- * the WPTR must be aligned to an 8 descriptor boundary.
- */
-#define SFC_EF10_RX_WPTR_ALIGN 8
-
-/**
  * Maximum number of descriptors/buffers in the Rx ring.
  * It should guarantee that corresponding event queue never overfill.
  * EF10 native datapath uses event queue of the same size as Rx queue.
@@ -88,29 +82,6 @@ sfc_ef10_rxq_by_dp_rxq(struct sfc_dp_rxq *dp_rxq)
 }
 
 static void
-sfc_ef10_rx_qpush(struct sfc_ef10_rxq *rxq)
-{
-   efx_dword_t dword;
-
-   /* Hardware has alignment restriction for WPTR */
-   RTE_BUILD_BUG_ON(SFC_RX_REFILL_BULK % SFC_EF10_RX_WPTR_ALIGN != 0);
-   SFC_ASSERT(RTE_ALIGN(rxq->added, SFC_EF10_RX_WPTR_ALIGN) == rxq->added);
-
-   EFX_POPULATE_DWORD_1(dword, ERF_DZ_RX_DESC_WPTR,
-rxq->added & rxq->ptr_mask);
-
-   /* DMA sync to device is not required */
-
-   /*
-* rte_write32() has rte_io_wmb() which guarantees that the STORE
-* operations (i.e. Rx and event descriptor updates) that precede
-* the rte_io_wmb() call are visible to NIC before the STORE
-* operations that follow it (i.e. doorbell write).
-*/
-   rte_write32(dword.ed_u32[0], rxq->doorbell);
-}
-
-static void
 sfc_ef10_rx_qrefill(struct sfc_ef10_rxq *rxq)
 {
const unsigned int ptr_mask = rxq->ptr_mask;
@@ -120,6 +91,8 @@ sfc_ef10_rx_qrefill(struct sfc_ef10_rxq *rxq)
void *objs[SFC_RX_REFILL_BULK];
unsigned int added = rxq->added;
 
+   RTE_BUILD_BUG_ON(SFC_RX_REFILL_BULK % SFC_EF10_RX_WPTR_ALIGN != 0);
+
free_space = rxq->max_fill_level - (added - rxq->completed);
 
if (free_space < rxq->refill_threshold)
@@ -178,7 +151,7 @@ sfc_ef10_rx_qrefill(struct sfc_ef10_rxq *rxq)
 
SFC_ASSERT(rxq->added != added);
rxq->added = added;
-   sfc_ef10_rx_qpush(rxq);
+   sfc_ef10_rx_qpush(rxq->doorbell, added, ptr_mask);
 }
 
 static void
-- 
2.7.4



[dpdk-dev] [PATCH 19/23] net/sfc/base: support MARK and FLAG actions in filters

2018-04-19 Thread Andrew Rybchenko
From: Roman Zhukov 

This patch adds support for DPDK rte_flow "MARK" and "FLAG" filter
actions to filters on EF10 family NICs.

Signed-off-by: Roman Zhukov 
Signed-off-by: Andrew Rybchenko 
---
 drivers/net/sfc/base/ef10_filter.c | 31 +++
 drivers/net/sfc/base/efx.h |  5 +
 drivers/net/sfc/base/efx_filter.c  | 21 +
 3 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/drivers/net/sfc/base/ef10_filter.c 
b/drivers/net/sfc/base/ef10_filter.c
index bf4992e..ae87285 100644
--- a/drivers/net/sfc/base/ef10_filter.c
+++ b/drivers/net/sfc/base/ef10_filter.c
@@ -172,7 +172,7 @@ efx_mcdi_filter_op_add(
__inout ef10_filter_handle_t *handle)
 {
efx_mcdi_req_t req;
-   uint8_t payload[MAX(MC_CMD_FILTER_OP_EXT_IN_LEN,
+   uint8_t payload[MAX(MC_CMD_FILTER_OP_V3_IN_LEN,
MC_CMD_FILTER_OP_EXT_OUT_LEN)];
efx_filter_match_flags_t match_flags;
efx_rc_t rc;
@@ -180,7 +180,7 @@ efx_mcdi_filter_op_add(
memset(payload, 0, sizeof (payload));
req.emr_cmd = MC_CMD_FILTER_OP;
req.emr_in_buf = payload;
-   req.emr_in_length = MC_CMD_FILTER_OP_EXT_IN_LEN;
+   req.emr_in_length = MC_CMD_FILTER_OP_V3_IN_LEN;
req.emr_out_buf = payload;
req.emr_out_length = MC_CMD_FILTER_OP_EXT_OUT_LEN;
 
@@ -316,16 +316,37 @@ efx_mcdi_filter_op_add(
spec->efs_ifrm_loc_mac, EFX_MAC_ADDR_LEN);
}
 
+   /*
+* Set the "MARK" or "FLAG" action for all packets matching this filter
+* if necessary (only useful with equal stride packed stream Rx mode
+* which provide the information in pseudo-header).
+* These actions require MC_CMD_FILTER_OP_V3_IN msgrequest.
+*/
+   if ((spec->efs_flags & EFX_FILTER_FLAG_ACTION_MARK) &&
+   (spec->efs_flags & EFX_FILTER_FLAG_ACTION_FLAG)) {
+   rc = EINVAL;
+   goto fail3;
+   }
+   if (spec->efs_flags & EFX_FILTER_FLAG_ACTION_MARK) {
+   MCDI_IN_SET_DWORD(req, FILTER_OP_V3_IN_MATCH_ACTION,
+   MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_MARK);
+   MCDI_IN_SET_DWORD(req, FILTER_OP_V3_IN_MATCH_MARK_VALUE,
+   spec->efs_mark);
+   } else if (spec->efs_flags & EFX_FILTER_FLAG_ACTION_FLAG) {
+   MCDI_IN_SET_DWORD(req, FILTER_OP_V3_IN_MATCH_ACTION,
+   MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_FLAG);
+   }
+
efx_mcdi_execute(enp, &req);
 
if (req.emr_rc != 0) {
rc = req.emr_rc;
-   goto fail3;
+   goto fail4;
}
 
if (req.emr_out_length_used < MC_CMD_FILTER_OP_EXT_OUT_LEN) {
rc = EMSGSIZE;
-   goto fail4;
+   goto fail5;
}
 
handle->efh_lo = MCDI_OUT_DWORD(req, FILTER_OP_EXT_OUT_HANDLE_LO);
@@ -333,6 +354,8 @@ efx_mcdi_filter_op_add(
 
return (0);
 
+fail5:
+   EFSYS_PROBE(fail5);
 fail4:
EFSYS_PROBE(fail4);
 fail3:
diff --git a/drivers/net/sfc/base/efx.h b/drivers/net/sfc/base/efx.h
index cd0e6f8..f5ec568 100644
--- a/drivers/net/sfc/base/efx.h
+++ b/drivers/net/sfc/base/efx.h
@@ -2622,6 +2622,10 @@ efx_tx_qdestroy(
 #defineEFX_FILTER_FLAG_RX  0x08
 /* Filter is for TX */
 #defineEFX_FILTER_FLAG_TX  0x10
+/* Set match flag on the received packet */
+#defineEFX_FILTER_FLAG_ACTION_FLAG 0x20
+/* Set match mark on the received packet */
+#defineEFX_FILTER_FLAG_ACTION_MARK 0x40
 
 typedef uint8_t efx_filter_flags_t;
 
@@ -2707,6 +2711,7 @@ typedef struct efx_filter_spec_s {
efx_oword_t efs_loc_host;
uint8_t efs_vni_or_vsid[EFX_VNI_OR_VSID_LEN];
uint8_t efs_ifrm_loc_mac[EFX_MAC_ADDR_LEN];
+   uint32_tefs_mark;
 } efx_filter_spec_t;
 
 
diff --git a/drivers/net/sfc/base/efx_filter.c 
b/drivers/net/sfc/base/efx_filter.c
index 97c972c..412298a 100644
--- a/drivers/net/sfc/base/efx_filter.c
+++ b/drivers/net/sfc/base/efx_filter.c
@@ -74,12 +74,33 @@ efx_filter_insert(
__inout efx_filter_spec_t *spec)
 {
const efx_filter_ops_t *efop = enp->en_efop;
+   efx_nic_cfg_t *encp = &(enp->en_nic_cfg);
+   efx_rc_t rc;
 
EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_FILTER);
EFSYS_ASSERT3P(spec, !=, NULL);
EFSYS_ASSERT3U(spec->efs_flags, &, EFX_FILTER_FLAG_RX);
 
+   if ((spec->efs_flags & EFX_FILTER_FLAG_ACTION_MARK) &&
+   !encp->enc_filter_action_mark_supported) {
+   rc = ENOTSUP;
+   goto fail1;
+   }
+
+   if ((spec->efs_flags & EFX_FILTER_FLAG_ACTION_FLAG) &&
+   !encp->enc_filter_action_flag_supported) {
+   rc = ENOTSUP;
+   goto fail2;
+   }
+
return (efop->efo_add(enp, spec, B_FALSE));
+
+fail2:

[dpdk-dev] [PATCH 05/23] net/sfc/base: add equal stride super-buffer prefix layout

2018-04-19 Thread Andrew Rybchenko
Signed-off-by: Andrew Rybchenko 
---
 drivers/net/sfc/base/efx_regs_ef10.h | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/net/sfc/base/efx_regs_ef10.h 
b/drivers/net/sfc/base/efx_regs_ef10.h
index 2cb96e8..968aaac 100644
--- a/drivers/net/sfc/base/efx_regs_ef10.h
+++ b/drivers/net/sfc/base/efx_regs_ef10.h
@@ -698,6 +698,21 @@ extern "C" {
 #defineES_DZ_PS_RX_PREFIX_ORIG_LEN_LBN 48
 #defineES_DZ_PS_RX_PREFIX_ORIG_LEN_WIDTH 16
 
+/* Equal stride super-buffer RX packet prefix (see SF-119419-TC) */
+#defineES_EZ_ESSB_RX_PREFIX_LEN 8
+#defineES_EZ_ESSB_RX_PREFIX_DATA_LEN_LBN 0
+#defineES_EZ_ESSB_RX_PREFIX_DATA_LEN_WIDTH 16
+#defineES_EZ_ESSB_RX_PREFIX_MARK_LBN 16
+#defineES_EZ_ESSB_RX_PREFIX_MARK_WIDTH 8
+#defineES_EZ_ESSB_RX_PREFIX_HASH_VALID_LBN 28
+#defineES_EZ_ESSB_RX_PREFIX_HASH_VALID_WIDTH 1
+#defineES_EZ_ESSB_RX_PREFIX_MARK_VALID_LBN 29
+#defineES_EZ_ESSB_RX_PREFIX_MARK_VALID_WIDTH 1
+#defineES_EZ_ESSB_RX_PREFIX_MATCH_FLAG_LBN 30
+#defineES_EZ_ESSB_RX_PREFIX_MATCH_FLAG_WIDTH 1
+#defineES_EZ_ESSB_RX_PREFIX_HASH_LBN 32
+#defineES_EZ_ESSB_RX_PREFIX_HASH_WIDTH 32
+
 /*
  * An extra flag for the packed stream mode,
  * signalling the start of a new buffer
-- 
2.7.4



[dpdk-dev] [PATCH 08/23] net/sfc: move EF10 Rx event parser to shared header

2018-04-19 Thread Andrew Rybchenko
Equal stride super-buffer Rx datapath will use it as well.

Signed-off-by: Andrew Rybchenko 
Reviewed-by: Ivan Malov 
---
 drivers/net/sfc/sfc_ef10_rx.c| 138 +---
 drivers/net/sfc/sfc_ef10_rx_ev.h | 164 +++
 2 files changed, 165 insertions(+), 137 deletions(-)
 create mode 100644 drivers/net/sfc/sfc_ef10_rx_ev.h

diff --git a/drivers/net/sfc/sfc_ef10_rx.c b/drivers/net/sfc/sfc_ef10_rx.c
index f8eb3c1..7560891 100644
--- a/drivers/net/sfc/sfc_ef10_rx.c
+++ b/drivers/net/sfc/sfc_ef10_rx.c
@@ -25,6 +25,7 @@
 #include "sfc_dp_rx.h"
 #include "sfc_kvargs.h"
 #include "sfc_ef10.h"
+#include "sfc_ef10_rx_ev.h"
 
 #define sfc_ef10_rx_err(dpq, ...) \
SFC_DP_LOG(SFC_KVARG_DATAPATH_EF10, ERR, dpq, __VA_ARGS__)
@@ -198,143 +199,6 @@ sfc_ef10_rx_prepared(struct sfc_ef10_rxq *rxq, struct 
rte_mbuf **rx_pkts,
return n_rx_pkts;
 }
 
-static void
-sfc_ef10_rx_ev_to_offloads(const efx_qword_t rx_ev, struct rte_mbuf *m,
-  uint64_t ol_mask)
-{
-   uint32_t tun_ptype = 0;
-   /* Which event bit is mapped to PKT_RX_IP_CKSUM_* */
-   int8_t ip_csum_err_bit;
-   /* Which event bit is mapped to PKT_RX_L4_CKSUM_* */
-   int8_t l4_csum_err_bit;
-   uint32_t l2_ptype = 0;
-   uint32_t l3_ptype = 0;
-   uint32_t l4_ptype = 0;
-   uint64_t ol_flags = 0;
-
-   if (unlikely(EFX_TEST_QWORD_BIT(rx_ev, ESF_DZ_RX_PARSE_INCOMPLETE_LBN)))
-   goto done;
-
-   switch (EFX_QWORD_FIELD(rx_ev, ESF_EZ_RX_ENCAP_HDR)) {
-   default:
-   /* Unexpected encapsulation tag class */
-   SFC_ASSERT(false);
-   /* FALLTHROUGH */
-   case ESE_EZ_ENCAP_HDR_NONE:
-   break;
-   case ESE_EZ_ENCAP_HDR_VXLAN:
-   /*
-* It is definitely UDP, but we have no information
-* about IPv4 vs IPv6 and VLAN tagging.
-*/
-   tun_ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP;
-   break;
-   case ESE_EZ_ENCAP_HDR_GRE:
-   /*
-* We have no information about IPv4 vs IPv6 and VLAN tagging.
-*/
-   tun_ptype = RTE_PTYPE_TUNNEL_NVGRE;
-   break;
-   }
-
-   if (tun_ptype == 0) {
-   ip_csum_err_bit = ESF_DZ_RX_IPCKSUM_ERR_LBN;
-   l4_csum_err_bit = ESF_DZ_RX_TCPUDP_CKSUM_ERR_LBN;
-   } else {
-   ip_csum_err_bit = ESF_EZ_RX_IP_INNER_CHKSUM_ERR_LBN;
-   l4_csum_err_bit = ESF_EZ_RX_TCP_UDP_INNER_CHKSUM_ERR_LBN;
-   if (unlikely(EFX_TEST_QWORD_BIT(rx_ev,
-   ESF_DZ_RX_IPCKSUM_ERR_LBN)))
-   ol_flags |= PKT_RX_EIP_CKSUM_BAD;
-   }
-
-   switch (EFX_QWORD_FIELD(rx_ev, ESF_DZ_RX_ETH_TAG_CLASS)) {
-   case ESE_DZ_ETH_TAG_CLASS_NONE:
-   l2_ptype = (tun_ptype == 0) ? RTE_PTYPE_L2_ETHER :
-   RTE_PTYPE_INNER_L2_ETHER;
-   break;
-   case ESE_DZ_ETH_TAG_CLASS_VLAN1:
-   l2_ptype = (tun_ptype == 0) ? RTE_PTYPE_L2_ETHER_VLAN :
-   RTE_PTYPE_INNER_L2_ETHER_VLAN;
-   break;
-   case ESE_DZ_ETH_TAG_CLASS_VLAN2:
-   l2_ptype = (tun_ptype == 0) ? RTE_PTYPE_L2_ETHER_QINQ :
-   RTE_PTYPE_INNER_L2_ETHER_QINQ;
-   break;
-   default:
-   /* Unexpected Eth tag class */
-   SFC_ASSERT(false);
-   }
-
-   switch (EFX_QWORD_FIELD(rx_ev, ESF_DZ_RX_L3_CLASS)) {
-   case ESE_DZ_L3_CLASS_IP4_FRAG:
-   l4_ptype = (tun_ptype == 0) ? RTE_PTYPE_L4_FRAG :
-   RTE_PTYPE_INNER_L4_FRAG;
-   /* FALLTHROUGH */
-   case ESE_DZ_L3_CLASS_IP4:
-   l3_ptype = (tun_ptype == 0) ? RTE_PTYPE_L3_IPV4_EXT_UNKNOWN :
-   RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
-   ol_flags |= PKT_RX_RSS_HASH |
-   ((EFX_TEST_QWORD_BIT(rx_ev, ip_csum_err_bit)) ?
-PKT_RX_IP_CKSUM_BAD : PKT_RX_IP_CKSUM_GOOD);
-   break;
-   case ESE_DZ_L3_CLASS_IP6_FRAG:
-   l4_ptype = (tun_ptype == 0) ? RTE_PTYPE_L4_FRAG :
-   RTE_PTYPE_INNER_L4_FRAG;
-   /* FALLTHROUGH */
-   case ESE_DZ_L3_CLASS_IP6:
-   l3_ptype = (tun_ptype == 0) ? RTE_PTYPE_L3_IPV6_EXT_UNKNOWN :
-   RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
-   ol_flags |= PKT_RX_RSS_HASH;
-   break;
-   case ESE_DZ_L3_CLASS_ARP:
-   /* Override Layer 2 packet type */
-   /* There is no ARP classification for inner packets */
-   if (tun_ptype == 0)
-   l2_ptype = RTE_PTYPE_L2_ETHER_ARP;
-   break;
-   default:
-   /* Unexpected Layer 3 class */
-   SFC_ASSERT(false);
-   

[dpdk-dev] [PATCH 13/23] net/sfc: support callback to check if mempool is supported

2018-04-19 Thread Andrew Rybchenko
The callback is a dummy yet since no Rx datapath provides its own
callback, so all pools are supported.

Signed-off-by: Andrew Rybchenko 
Reviewed-by: Ivan Malov 
---
 drivers/net/sfc/sfc_dp_rx.h  | 13 +
 drivers/net/sfc/sfc_ethdev.c | 16 
 2 files changed, 29 insertions(+)

diff --git a/drivers/net/sfc/sfc_dp_rx.h b/drivers/net/sfc/sfc_dp_rx.h
index db075dd..cb745e6 100644
--- a/drivers/net/sfc/sfc_dp_rx.h
+++ b/drivers/net/sfc/sfc_dp_rx.h
@@ -90,6 +90,18 @@ struct sfc_dp_rx_qcreate_info {
 typedef void (sfc_dp_rx_get_dev_info_t)(struct rte_eth_dev_info *dev_info);
 
 /**
+ * Test if an Rx datapath supports specific mempool ops.
+ *
+ * @param pool The name of the pool operations to test.
+ *
+ * @return Check status.
+ * @retval 0   Best mempool ops choice.
+ * @retval 1   Mempool ops are supported.
+ * @retval -ENOTSUPMempool ops not supported.
+ */
+typedef int (sfc_dp_rx_pool_ops_supported_t)(const char *pool);
+
+/**
  * Get size of receive and event queue rings by the number of Rx
  * descriptors and mempool configuration.
  *
@@ -182,6 +194,7 @@ struct sfc_dp_rx {
 #define SFC_DP_RX_FEAT_MULTI_PROCESS   0x2
 #define SFC_DP_RX_FEAT_TUNNELS 0x4
sfc_dp_rx_get_dev_info_t*get_dev_info;
+   sfc_dp_rx_pool_ops_supported_t  *pool_ops_supported;
sfc_dp_rx_qsize_up_rings_t  *qsize_up_rings;
sfc_dp_rx_qcreate_t *qcreate;
sfc_dp_rx_qdestroy_t*qdestroy;
diff --git a/drivers/net/sfc/sfc_ethdev.c b/drivers/net/sfc/sfc_ethdev.c
index 700e154..c3f37bc 100644
--- a/drivers/net/sfc/sfc_ethdev.c
+++ b/drivers/net/sfc/sfc_ethdev.c
@@ -1630,6 +1630,21 @@ sfc_dev_filter_ctrl(struct rte_eth_dev *dev, enum 
rte_filter_type filter_type,
return -rc;
 }
 
+static int
+sfc_pool_ops_supported(struct rte_eth_dev *dev, const char *pool)
+{
+   struct sfc_adapter *sa = dev->data->dev_private;
+
+   /*
+* If Rx datapath does not provide callback to check mempool,
+* all pools are supported.
+*/
+   if (sa->dp_rx->pool_ops_supported == NULL)
+   return 1;
+
+   return sa->dp_rx->pool_ops_supported(pool);
+}
+
 static const struct eth_dev_ops sfc_eth_dev_ops = {
.dev_configure  = sfc_dev_configure,
.dev_start  = sfc_dev_start,
@@ -1678,6 +1693,7 @@ static const struct eth_dev_ops sfc_eth_dev_ops = {
.fw_version_get = sfc_fw_version_get,
.xstats_get_by_id   = sfc_xstats_get_by_id,
.xstats_get_names_by_id = sfc_xstats_get_names_by_id,
+   .pool_ops_supported = sfc_pool_ops_supported,
 };
 
 /**
-- 
2.7.4



  1   2   3   >