[dpdk-dev] [RFC PATCH] virtio: virtio ring layout optimization and vectorization rx

2015-09-18 Thread Huawei Xie
This single patch is for people to get familiar with the optimization and is 
for collecting feedbacks.
It isn't splitted because it is straightforward.
Haven't finished the cleanups.
The description and illustration of the idea is in a previous mail titled 
"virtio optimization idea".

---
 config/common_linuxapp  |   1 +
 drivers/net/virtio/Makefile |   2 +-
 drivers/net/virtio/virtio_ethdev.c  |  16 ++
 drivers/net/virtio/virtio_ethdev.h  |   5 +
 drivers/net/virtio/virtio_rxtx.c|  74 +-
 drivers/net/virtio/virtio_rxtx.h|  41 
 drivers/net/virtio/virtio_rxtx_simple.c | 383 
 drivers/net/virtio/virtqueue.h  |   4 +
 8 files changed, 522 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/virtio/virtio_rxtx.h
 create mode 100644 drivers/net/virtio/virtio_rxtx_simple.c

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 0de43d5..02b80cd 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -241,6 +241,7 @@ CONFIG_RTE_LIBRTE_ENIC_DEBUG=n
 # Compile burst-oriented VIRTIO PMD driver
 #
 CONFIG_RTE_LIBRTE_VIRTIO_PMD=y
+CONFIG_RTE_VIRTIO_SIMPLE=y
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_INIT=n
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 930b60f..89a8a37 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -50,7 +50,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtqueue.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_pci.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
-
+SRCS-$(CONFIG_RTE_VIRTIO_SIMPLE) += virtio_rxtx_simple.c

 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 465d3cd..7b24b96 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -61,6 +61,7 @@
 #include "virtio_pci.h"
 #include "virtio_logs.h"
 #include "virtqueue.h"
+#include "virtio_rxtx.h"


 static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
@@ -291,6 +292,9 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
dev->data->port_id, queue_idx);
vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
vq_size * sizeof(struct vq_desc_extra), 
RTE_CACHE_LINE_SIZE);
+   vq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
+   (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) * 
sizeof(vq->sw_ring[0]),
+   RTE_CACHE_LINE_SIZE, socket_id);
} else if (queue_type == VTNET_TQ) {
snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d",
dev->data->port_id, queue_idx);
@@ -307,6 +311,10 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
PMD_INIT_LOG(ERR, "%s: Can not allocate virtqueue", __func__);
return (-ENOMEM);
}
+   if (queue_type == VTNET_RQ && vq->sw_ring == NULL) {
+   PMD_INIT_LOG(ERR, "%s: Can not allocate soft ring", __func__);
+   return -ENOMEM;
+   }

vq->hw = hw;
vq->port_id = dev->data->port_id;
@@ -1150,6 +1158,14 @@ rx_func_get(struct rte_eth_dev *eth_dev)
eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
else
eth_dev->rx_pkt_burst = &virtio_recv_pkts;
+
+#ifdef RTE_VIRTIO_SIMPLE
+   if (!vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
+   printf("use simple rxtx\n");
+   eth_dev->rx_pkt_burst = &virtio_recv_pkts_simple;
+   eth_dev->tx_pkt_burst = &virtio_xmit_pkts_simple;
+   }
+#endif
 }

 /*
diff --git a/drivers/net/virtio/virtio_ethdev.h 
b/drivers/net/virtio/virtio_ethdev.h
index 9026d42..e45e863 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -108,6 +108,11 @@ uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct 
rte_mbuf **rx_pkts,
 uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);

+uint16_t virtio_recv_pkts_simple(void *rx_queue, struct rte_mbuf **rx_pkts,
+uint16_t nb_pkts);
+
+uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
+uint16_t nb_pkts);

 /*
  * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index c5b53bb..0ba1ea1 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -54,6 +54,7 @@
 #include "virtio_logs.h"
 #include "virtio_ethdev.h"
 #include "virtqueue.h"
+#include "virtio_rxtx.h"

 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(st

[dpdk-dev] IXGBE error statistics

2015-09-18 Thread Igor Ryzhov
Hello everyone.

Investigating IXGBE driver I found an mspdc counter (MAC Short Packet Discard). 
And I am wondering why this counter is not used in the calculation of total RX 
errors (ierrors field in rte_eth_stats structure). Is it already a part of 
another counter, for example, rlec (Receive Length Error)? Or is it a bug?

Another one question is about incompleteness of rte_eth_stats structure. IXGBE 
and other drivers have a lot of counters but only a part of them is represented 
in rte_eth_stats. Is there any valuable reasons for that or it's just not 
implemented?

Best regards,
Igor Ryzhov


[dpdk-dev] vhost-net stops sending to virito pmd -- already fixed?

2015-09-18 Thread Xie, Huawei
On 9/17/2015 1:25 AM, Kyle Larose wrote:
> Hi Huawei,
>
>> Kyle:
>> Could you tell us how did you produce this issue, very small pool size
>> or you are using pipeline model?
> If I understand correctly, by pipeline model you mean a model whereby
> multiple threads handle a given packet, with some sort IPC (e.g. dpdk
> rings) between them? If so, yes: we are using such a model. And I
> suspect that this model is where we run into issues: the length of the
> pipeline, combined with the queuing between stages, can lead to us
> exhausting the mbufs, particularly when a stage's load causes queuing.
Yes, exactly.
>
> When I initially ran into this issue, I had a fairly large mbuf pool
> (32K entries), with 3 stages in the pipeline: rx, worker, tx. There
> were two worker threads, with a total of 6 rings. I was sending some
> fairly bursty traffic, at a high packet rate (it was bursting up to
> around 1Mpkt/s). There was a low chance that this actually caused the
> problem. However, when I decreased the mbuf pool to 1000 entries, it
> *always* happened.
>
> In summary: the pipeline model is important here, and a small pool
> size definitely exacerbates the problem.
>
> I was able to reproduce the problem using the load_balancer sample
> application, though it required some modification to get it to run
> with virtio. I'm not sure if this is because I'm using DPDK 1.8,  or
> something else. Either way, I made the number of mbufs configurable
> via an environment variable, and was able to show that decreasing it
> from the default of 32K to 1K would cause the problem to always happen
> when using the same traffic as with my application. Applying the below
> patch fixed the problem.
>
> The following patch seems to fix the problem for me, though I'm not
> sure it's the optimal solution. It does so by removing the early exit
> which prevents us from allocating mbufs. After we skip over the packet
> processing loop since there are no packets, the mbuf allocation loop
> runs.  Note that the patch is on dpdk 1.8.
Yes, it will fix your problem. We could try to do the refill each time
we enter the loop no matter there is avail packets or not.

> diff --git a/lib/librte_pmd_virtio/virtio_rxtx.c
> b/lib/librte_pmd_virtio/virtio_rxtx.c
> index c013f97..7cadf52 100644
> --- a/lib/librte_pmd_virtio/virtio_rxtx.c
> +++ b/lib/librte_pmd_virtio/virtio_rxtx.c
> @@ -463,9 +463,6 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf
> **rx_pkts, uint16_t nb_pkts)
> if (likely(num > DESC_PER_CACHELINE))
> num = num - ((rxvq->vq_used_cons_idx + num) %
> DESC_PER_CACHELINE);
>
> -   if (num == 0)
> -   return 0;
> -
> num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, num);
> PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
> for (i = 0; i < num ; i++) {
> @@ -549,9 +546,6 @@ virtio_recv_mergeable_pkts(void *rx_queue,
>
> rmb();
>
> -   if (nb_used == 0)
> -   return 0;
> -
> PMD_RX_LOG(DEBUG, "used:%d\n", nb_used);
>
> while (i < nb_used) {
>
> Thanks,
>
> Kyle
>



[dpdk-dev] vhost-net stops sending to virito pmd -- already fixed?

2015-09-18 Thread Xie, Huawei
On 9/14/2015 5:44 AM, Thomas Monjalon wrote:
> Hi,
>
> 2015-09-11 12:32, Kyle Larose:
>> Looking through the version tree for virtio_rxtx.c, I saw the following
>> commit:
>>
>> http://dpdk.org/browse/dpdk/commit/lib/librte_pmd_virtio?id=8c09c20fb4cde76e53d87bd50acf2b441ecf6eb8
>>
>> Does anybody know offhand if the issue fixed by that commit could be the
>> root cause of what I am seeing?
> I won't have the definitive answer but I would like to use your question
> to highlight a common issue in git messages:
>
> PLEASE, authors of fixes, explain the bug you are fixing and how it can
> be reproduced. Good commit messages are REALLY read and useful.
Thomas:
Thanks for reminder. I am not the author of this specific patch, :). We
will try to keep commit message simple, enough and useful.
In my opinion, this commit message is totally ok. It doesn't fix
anything but remove some unnecessary ring update operations.

>
> Thanks
>
>



[dpdk-dev] IXGBE error statistics

2015-09-18 Thread Van Haaren, Harry
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Igor Ryzhov
> Hello everyone.

Hi Igor,

> Investigating IXGBE driver I found an mspdc counter (MAC Short Packet
> Discard). And I am wondering why this counter is not used in the calculation
> of total RX errors (ierrors field in rte_eth_stats structure). Is it already 
> a part
> of another counter, for example, rlec (Receive Length Error)? Or is it a bug?

There has been a discussion on list recently involving ixgbe stats, and certain
packets triggering multiple stats registers - the datasheet doesn't mention
this could be the case for the mspdc register, I will research this issue and
get back to you.

> Another one question is about incompleteness of rte_eth_stats structure.
> IXGBE and other drivers have a lot of counters but only a part of them is
> represented in rte_eth_stats. Is there any valuable reasons for that or it's
> just not implemented?

The rte_eth_stats struct presents the most general statistics that every NIC 
exposes.
In 2.1, and extended statistics API was added which allows NICs to expose stats
that are unique to that NIC. Currently ixgbe is the only driver that has the 
xstats API
implemented, I am working on patches to implement the functionality for the 
other
Intel drivers.

As part of testing the xstats implementation for each driver, I can test the 
exact
behavior of the mspdc counter, and if it is mis-counted this should become 
clear.

Cheers, -Harry


[dpdk-dev] [PATCH v2 0/5] Virtual PMD using sze2 layer for COMBO cards

2015-09-18 Thread Matej Vido
This is virtual PMD which communicates with COMBO-80G and COMBO-100G
cards through sze2 layer. Communication with COMBO card is managed
through interface provided by libsze2 library and kernel modules
(combov3, szedata2_cv3).

To compile and use PMD, it is necessary to have libsze2 library installed and
kernel modules (combov3, szedata2_cv3) loaded.
Therefore in default configuration PMD compilation is disabled. To compile
szedata2 PMD, it is necessary to enable CONFIG_RTE_LIBRTE_PMD_SZEDATA2=y.

v2:
code cleanup
add handling scattered packets
update release notes

Matej Vido (5):
  szedata2: add new poll mode driver
  szedata2: add handling of scattered packets in RX
  szedata2: add handling of scattered packets in TX
  doc: add documentation for szedata2 PMD
  doc: update 2.2 release notes

 config/common_bsdapp  |5 +
 config/common_linuxapp|5 +
 doc/guides/nics/index.rst |1 +
 doc/guides/nics/szedata2.rst  |  105 ++
 doc/guides/prog_guide/source_org.rst  |1 +
 doc/guides/rel_notes/release_2_2.rst  |4 +
 drivers/net/Makefile  |1 +
 drivers/net/szedata2/Makefile |   62 +
 drivers/net/szedata2/rte_eth_szedata2.c   | 1638 +
 drivers/net/szedata2/rte_eth_szedata2.h   |   96 ++
 drivers/net/szedata2/rte_pmd_szedata2_version.map |4 +
 mk/rte.app.mk |3 +
 12 files changed, 1925 insertions(+)
 create mode 100644 doc/guides/nics/szedata2.rst
 create mode 100644 drivers/net/szedata2/Makefile
 create mode 100644 drivers/net/szedata2/rte_eth_szedata2.c
 create mode 100644 drivers/net/szedata2/rte_eth_szedata2.h
 create mode 100644 drivers/net/szedata2/rte_pmd_szedata2_version.map

-- 
1.9.1



[dpdk-dev] [PATCH v2 1/5] szedata2: add new poll mode driver

2015-09-18 Thread Matej Vido
Add virtual PMD which communicates with COMBO cards through sze2
layer using libsze2 library.

Since link_speed is uint16_t, there can not be used number for 100G
speed, therefore link_speed is set to ETH_LINK_SPEED_10G until the
type of link_speed is solved.

v2:
Code cleanup.
Fix error handling by initialization of rx, tx dma channels.
Add uninit function.

Signed-off-by: Matej Vido 
Reviewed-by: Jan Viktorin 
---
 config/common_bsdapp  |5 +
 config/common_linuxapp|5 +
 drivers/net/Makefile  |1 +
 drivers/net/szedata2/Makefile |   62 ++
 drivers/net/szedata2/rte_eth_szedata2.c   | 1212 +
 drivers/net/szedata2/rte_eth_szedata2.h   |   96 ++
 drivers/net/szedata2/rte_pmd_szedata2_version.map |4 +
 mk/rte.app.mk |3 +
 8 files changed, 1388 insertions(+)
 create mode 100644 drivers/net/szedata2/Makefile
 create mode 100644 drivers/net/szedata2/rte_eth_szedata2.c
 create mode 100644 drivers/net/szedata2/rte_eth_szedata2.h
 create mode 100644 drivers/net/szedata2/rte_pmd_szedata2_version.map

diff --git a/config/common_bsdapp b/config/common_bsdapp
index b37dcf4..d2b6f4f 100644
--- a/config/common_bsdapp
+++ b/config/common_bsdapp
@@ -272,6 +272,11 @@ CONFIG_RTE_PMD_RING_MAX_TX_RINGS=16
 CONFIG_RTE_LIBRTE_PMD_PCAP=y

 #
+# Compile software PMD backed by SZEDATA2 device
+#
+CONFIG_RTE_LIBRTE_PMD_SZEDATA2=n
+
+#
 # Compile link bonding PMD library
 #
 CONFIG_RTE_LIBRTE_PMD_BOND=y
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 0de43d5..f5963e7 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -270,6 +270,11 @@ CONFIG_RTE_PMD_RING_MAX_TX_RINGS=16
 CONFIG_RTE_LIBRTE_PMD_PCAP=n

 #
+# Compile software PMD backed by SZEDATA2 device
+#
+CONFIG_RTE_LIBRTE_PMD_SZEDATA2=n
+
+#
 # Compile link bonding PMD library
 #
 CONFIG_RTE_LIBRTE_PMD_BOND=y
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 5ebf963..7499d1e 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -48,6 +48,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += ring
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += xenvirt
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += szedata2

 include $(RTE_SDK)/mk/rte.sharelib.mk
 include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/drivers/net/szedata2/Makefile b/drivers/net/szedata2/Makefile
new file mode 100644
index 000..c3c42e5
--- /dev/null
+++ b/drivers/net/szedata2/Makefile
@@ -0,0 +1,62 @@
+#   BSD LICENSE
+#
+#   Copyright (c) 2015 CESNET
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of CESNET nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_szedata2.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+EXPORT_MAP := rte_pmd_szedata2_version.map
+
+LIBABIVER := 1
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += rte_eth_szedata2.c
+
+#
+# Export include files
+#
+SYMLINK-y-include +=
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += lib/librte_malloc
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += lib/librte_kvargs
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/szed

[dpdk-dev] [PATCH v2 2/5] szedata2: add handling of scattered packets in RX

2015-09-18 Thread Matej Vido
Add new RX function for handling scattered packets.

Signed-off-by: Matej Vido 
Reviewed-by: Jan Viktorin 
---
 drivers/net/szedata2/rte_eth_szedata2.c | 356 +++-
 1 file changed, 354 insertions(+), 2 deletions(-)

diff --git a/drivers/net/szedata2/rte_eth_szedata2.c 
b/drivers/net/szedata2/rte_eth_szedata2.c
index 4db1287..ddb45e4 100644
--- a/drivers/net/szedata2/rte_eth_szedata2.c
+++ b/drivers/net/szedata2/rte_eth_szedata2.c
@@ -362,6 +362,343 @@ eth_szedata2_rx(void *queue,
 }

 static uint16_t
+eth_szedata2_rx_scattered(void *queue,
+   struct rte_mbuf **bufs,
+   uint16_t nb_pkts)
+{
+   unsigned int i;
+   struct rte_mbuf *mbuf;
+   struct szedata2_rx_queue *sze_q = queue;
+   struct rte_pktmbuf_pool_private *mbp_priv;
+   uint16_t num_rx = 0;
+   uint16_t buf_size;
+   uint16_t sg_size;
+   uint16_t hw_size;
+   uint16_t packet_size;
+   uint64_t num_bytes = 0;
+   struct szedata * sze = sze_q->sze;
+   uint8_t * header_ptr = NULL; /* header of packet */
+   uint8_t * packet_ptr1 = NULL;
+   uint8_t * packet_ptr2 = NULL;
+   uint16_t packet_len1 = 0;
+   uint16_t packet_len2 = 0;
+   uint16_t hw_data_align;
+
+   if (unlikely(sze_q->sze == NULL || nb_pkts == 0)) {
+   return 0;
+   }
+
+   /*
+* Reads the given number of packets from szedata2 channel given
+* by queue and copies the packet data into a newly allocated mbuf
+* to return.
+*/
+   for (i = 0; i < nb_pkts; i++) {
+   const struct szedata_lock * ct_rx_lck_backup;
+   unsigned int ct_rx_rem_bytes_backup;
+   unsigned char * ct_rx_cur_ptr_backup;
+
+   /* get the next sze packet */
+   if (sze->ct_rx_lck != NULL && !sze->ct_rx_rem_bytes &&
+   sze->ct_rx_lck->next == NULL) {
+   /* unlock old data */
+   szedata_rx_unlock_data(sze_q->sze, sze->ct_rx_lck_orig);
+   sze->ct_rx_lck_orig = NULL;
+   sze->ct_rx_lck = NULL;
+   }
+
+   /*
+* Store items from sze structure which can be changed
+* before mbuf allocating. Use these items in case of mbuf
+* allocating failure.
+*/
+   ct_rx_lck_backup = sze->ct_rx_lck;
+   ct_rx_rem_bytes_backup = sze->ct_rx_rem_bytes;
+   ct_rx_cur_ptr_backup = sze->ct_rx_cur_ptr;
+
+   if (!sze->ct_rx_rem_bytes && sze->ct_rx_lck_orig == NULL) {
+   /* nothing to read, lock new data */
+   sze->ct_rx_lck_orig = sze->ct_rx_lck =
+   szedata_rx_lock_data(sze_q->sze, ~0U);
+
+   /*
+* Backup items from sze structure must be updated
+* after locking to contain pointers to new locks.
+*/
+   ct_rx_lck_backup = sze->ct_rx_lck;
+   ct_rx_rem_bytes_backup = sze->ct_rx_rem_bytes;
+   ct_rx_cur_ptr_backup = sze->ct_rx_cur_ptr;
+
+   if (sze->ct_rx_lck == NULL) {
+   /* nothing to lock */
+   break;
+   }
+
+   sze->ct_rx_cur_ptr = sze->ct_rx_lck->start;
+   sze->ct_rx_rem_bytes = sze->ct_rx_lck->len;
+
+   if (!sze->ct_rx_rem_bytes) {
+   break;
+   }
+   }
+
+   if (sze->ct_rx_rem_bytes < RTE_SZE2_PACKET_HEADER_SIZE) {
+   /*
+* cut in header - copy parts of header to merge buffer
+*/
+   if (sze->ct_rx_lck->next == NULL) {
+   break;
+   }
+
+   /* copy first part of header */
+   rte_memcpy(sze->ct_rx_buffer, sze->ct_rx_cur_ptr,
+   sze->ct_rx_rem_bytes);
+
+   /* copy second part of header */
+   sze->ct_rx_lck = sze->ct_rx_lck->next;
+   sze->ct_rx_cur_ptr = sze->ct_rx_lck->start;
+   rte_memcpy(sze->ct_rx_buffer + sze->ct_rx_rem_bytes,
+   sze->ct_rx_cur_ptr,
+   RTE_SZE2_PACKET_HEADER_SIZE -
+   sze->ct_rx_rem_bytes);
+
+   sze->ct_rx_cur_ptr += RTE_SZE2_PACKET_HEADER_SIZE -
+   sze->ct_rx_rem_bytes;
+   sze->ct_rx_rem_bytes = sze->ct_rx_lck->len -
+   RTE_SZE2_PACKET_HEADER_SIZE +
+   sze->ct_rx_rem_bytes;
+
+  

[dpdk-dev] [PATCH v2 3/5] szedata2: add handling of scattered packets in TX

2015-09-18 Thread Matej Vido
TX function modified to handle chained mbufs.

Signed-off-by: Matej Vido 
Reviewed-by: Jan Viktorin 
---
 drivers/net/szedata2/rte_eth_szedata2.c | 108 +++-
 1 file changed, 91 insertions(+), 17 deletions(-)

diff --git a/drivers/net/szedata2/rte_eth_szedata2.c 
b/drivers/net/szedata2/rte_eth_szedata2.c
index ddb45e4..e2d6501 100644
--- a/drivers/net/szedata2/rte_eth_szedata2.c
+++ b/drivers/net/szedata2/rte_eth_szedata2.c
@@ -737,7 +737,7 @@ eth_szedata2_tx(void *queue,
 next_packet:
mbuf = bufs[nb_pkts - pkt_left];

-   pkt_len = mbuf->data_len;
+   pkt_len = mbuf->pkt_len;
mbuf_segs = mbuf->nb_segs;

hwpkt_len = RTE_SZE2_PACKET_HEADER_SIZE_ALIGNED +
@@ -764,9 +764,28 @@ next_packet:
/* copy packet from mbuf */
tmp_dst = ((uint8_t *)(dst)) +
RTE_SZE2_PACKET_HEADER_SIZE_ALIGNED;
-   rte_memcpy(tmp_dst,
-   rte_pktmbuf_mtod(mbuf, const void *),
-   pkt_len);
+   if (likely(mbuf_segs == 1)) {
+   /*
+* non-scattered packet,
+* transmit from one mbuf
+*/
+   rte_memcpy(tmp_dst,
+   rte_pktmbuf_mtod(mbuf, const void *),
+   pkt_len);
+   } else {
+   /* scattered packet, transmit from more mbufs */
+   struct rte_mbuf * m = mbuf;
+   while (m) {
+   rte_memcpy(tmp_dst,
+   rte_pktmbuf_mtod(m,
+   const void *),
+   m->data_len);
+   tmp_dst = ((uint8_t *)(tmp_dst)) +
+   m->data_len;
+   m = m->next;
+   }
+   }
+

dst = ((uint8_t *)dst) + hwpkt_len;
unlock_size += hwpkt_len;
@@ -805,19 +824,74 @@ next_packet:

tmp_dst = ((uint8_t *)(dst)) +
RTE_SZE2_PACKET_HEADER_SIZE_ALIGNED;
-   /* copy part of packet to first area */
-   rte_memcpy(tmp_dst,
-   rte_pktmbuf_mtod(mbuf, const void *),
-   write_len);
-
-   if (lck->next)
-   dst = lck->next->start;
-
-   /* copy part of packet to second area */
-   rte_memcpy(dst,
-   (const void *) (rte_pktmbuf_mtod(mbuf,
-   const uint8_t *) +
-   write_len), pkt_len - write_len);
+   if (likely(mbuf_segs == 1)) {
+   /*
+* non-scattered packet,
+* transmit from one mbuf
+*/
+   /* copy part of packet to first area */
+   rte_memcpy(tmp_dst,
+   rte_pktmbuf_mtod(mbuf, const void *),
+   write_len);
+
+   if (lck->next)
+   dst = lck->next->start;
+
+   /* copy part of packet to second area */
+   rte_memcpy(dst,
+   (const void *) (rte_pktmbuf_mtod(mbuf,
+   const uint8_t *) +
+   write_len), pkt_len - write_len);
+   } else {
+   /* scattered packet, transmit from more mbufs */
+   struct rte_mbuf * m = mbuf;
+   uint16_t written = 0;
+   uint16_t to_write = 0;
+   bool new_mbuf = true;
+   uint16_t write_off = 0;
+
+   /* copy part of packet to first area */
+   while (m && written < write_len) {
+   to_write = RTE_MIN(m->data_len,
+   write_len - written);
+   rte_memcpy(tmp_dst,
+   rte_pktmbuf_mtod(m,
+   con

[dpdk-dev] [PATCH v2 4/5] doc: add documentation for szedata2 PMD

2015-09-18 Thread Matej Vido
Signed-off-by: Matej Vido 
Reviewed-by: Jan Viktorin 
---
 doc/guides/nics/index.rst|   1 +
 doc/guides/nics/szedata2.rst | 105 +++
 doc/guides/prog_guide/source_org.rst |   1 +
 3 files changed, 107 insertions(+)
 create mode 100644 doc/guides/nics/szedata2.rst

diff --git a/doc/guides/nics/index.rst b/doc/guides/nics/index.rst
index d1a92f8..fcbf8dd 100644
--- a/doc/guides/nics/index.rst
+++ b/doc/guides/nics/index.rst
@@ -45,6 +45,7 @@ Network Interface Controller Drivers
 ixgbe
 intel_vf
 mlx4
+szedata2
 virtio
 vmxnet3
 pcap_ring
diff --git a/doc/guides/nics/szedata2.rst b/doc/guides/nics/szedata2.rst
new file mode 100644
index 000..05864c5
--- /dev/null
+++ b/doc/guides/nics/szedata2.rst
@@ -0,0 +1,105 @@
+..  BSD LICENSE
+Copyright 2015 CESNET
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in
+the documentation and/or other materials provided with the
+distribution.
+* Neither the name of CESNET nor the names of its
+contributors may be used to endorse or promote products derived
+from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+SZEDATA2 PMD
+
+
+SZEDATA2 PMD is virtual PMD which uses sze2 layer to communicate with COMBO
+cards (COMBO-80G, COMBO-100G) using interface provided by libsze2 library.
+
+.. note::
+
+   This driver has external dependencies. Therefore it is disabled in default
+   configuration files. It can be enabled by setting 
CONFIG_RTE_LIBRTE_PMD_SZEDATA2=y
+   and recompiling.
+
+Prerequisities
+--
+
+This PMD requires kernel modules which are responsible for initialization and
+allocation of resources needed for sze2 layer function. Communication between
+PMD and kernel modules is mediated by libsze2 library. These kernel modules and
+library are not part of DPDK and must be installed separately:
+
+- **libsze2**
+
+  This library provides API for initialization of sze2 transfers, receiving and
+  transmitting data segments.
+
+- **Kernel modules**
+
+  These kernel modules manage initialization of hardware, allocation and 
sharing
+  of resources for user space applications:
+
+  - combov3
+  - szedata2_cv3
+
+Using PMD
+-
+
+SZEDATA2 PMD can be created by passing --vdev= option to EAL in the following
+format:
+
+.. code-block:: console
+
+--vdev 
'DEVICE_NAME,dev_path=PATH_TO_SZEDATA2_DEVICE,rx_ifaces=RX_MASK,tx_ifaces=TX_MASK'
+
+DEVICE_NAME and options dev_path, rx_ifaces, tx_ifaces are mandatory and must
+be separated by commas.
+
+*   DEVICE_NAME: contains prefix eth_szedata2 followed by numbers or letters,
+must be unique for each virtual device
+
+*   dev_path: Defines path to szedata2 device.
+Value is valid path to szedata2 device.
+
+dev_path=/dev/szedataII0
+
+*   rx_ifaces: Defines which receive channels will be used.
+For each channel is created one queue. Value is mask for selecting which
+receive channels are required.
+
+rx_ifaces=0x3
+
+*   tx_ifaces: Defines which transmit channels will be used.
+For each channel is created one queue. Value is mask for selecting which
+transmit channels are required.
+
+tx_ifaces=0x3
+
+Example of usage
+
+
+Read packets from 0. and 1. receive channel and write them to 0. and 1. 
transmit
+channel
+
+.. code-block:: console
+
+$RTE_TARGET/app/testpmd -c 0xf -n 2 --vdev 
'eth_szedata20,dev_path=/dev/szedataII0,rx_ifaces=0x3,tx_ifaces=0x3' -- 
--port-topology=chained --rxq=2 --txq=2 --nb-cores=2
diff --git a/doc/guides/prog_guide/source_org.rst 
b/doc/guides/prog_guide/source_org.rst
index ae11b3b..2393002 100644
--- a/doc/guides/prog_guide/source_org.rst
+++ b/doc/guides/prog_guide/source_org.rst
@@ 

[dpdk-dev] [PATCH v2 5/5] doc: update 2.2 release notes

2015-09-18 Thread Matej Vido
Add szedata2 PMD to 2.2 release notes.

Signed-off-by: Matej Vido 
Reviewed-by: Jan Viktorin 
---
 doc/guides/rel_notes/release_2_2.rst | 4 
 1 file changed, 4 insertions(+)

diff --git a/doc/guides/rel_notes/release_2_2.rst 
b/doc/guides/rel_notes/release_2_2.rst
index 682f468..c78f94d 100644
--- a/doc/guides/rel_notes/release_2_2.rst
+++ b/doc/guides/rel_notes/release_2_2.rst
@@ -4,6 +4,10 @@ DPDK Release 2.2
 New Features
 

+* **Added virtual szedata2 driver for COMBO cards.**
+
+  Added virtual PMD for COMBO-100G and COMBO-80G cards.
+  PMD is disabled in default configuration.

 Resolved Issues
 ---
-- 
1.9.1



[dpdk-dev] Reg: NIC82599 is not recovering once it stalls.

2015-09-18 Thread bharath paulraj
Hi Folks,

While doing some implementation with dpdk-1.7.0 on NIC 82599, I am facing
an issue. If i am sending the packets to the NIC at the line rate and the
application which i developed using DPDK is not processing the packets that
much faster, then the NIC starts to drop packets (of course, this is
expected) and not recovering, even if I reduce the rate to few packets per
second, which is not expected.

Am i missing something in my application?

-- 
Regards,
Bharath


[dpdk-dev] rte_eth_rx_queue_count accuracy

2015-09-18 Thread Alejandro Lucero
I have seen the API definition says nothing about accuracy but some PMD
implementations sacrifice accuracy for the sake of performance. If I'm not
understanding the code wrongly  i40e and ixgbe check DD bit just for the
first descriptor in a group of 4, and they take all of them as used if the
first descriptor is used.

By other hand, they do a "heavy" calculation when the descriptor ring wraps
which does not make sense (to me) if same performance goal is used.

There are PMDs not supporting this option and I can not see any app or
example using it so I do not know how important is this function, its
accuracy and its performance impact. Can someone comment on this?

Thanks


[dpdk-dev] libdpdk upstream changes for ecosystem best practices

2015-09-18 Thread Robie Basak
Hi Thomas,

On Wed, Sep 02, 2015 at 04:18:33PM +0200, Thomas Monjalon wrote:
> > First, it would be easier for us to ship a single binary package that
> > ships a single shared library to cover all of DPDK that library
> > consumers might need, rather than having it split up as you do. I
> > understand the build system is capable of doing this already, but what
> > we don?t have is a well defined soname and sover (currently
> > parameterized in the build) for ABI compatibility purposes. As a binary
> 
> No it is now fixed:
>   http://dpdk.org/browse/dpdk/commit/?id=c3ce2ad3548

It's great that the name "dpdk" is pinned down - thanks. But we need to
define the sover also, and make sure it is bumped when the ABI changes.
AIUI the build currently produces no sover - is this correct?

We'll use a sover of 0 in our packaging for now, unless you object. Then
we'll be able to move up to whatever you do when it is well-defined.

> > So that we can get DPDK packaging into Ubuntu immediately, please could
> > we agree to define (and burn) libdpdk.so.0 to be the ABI that builds
> > with upstream release 2.0.0 when built with the native-linuxapp-gcc
> > template options plus the following changes:
> > CONFIG_RTE_MACHINE=?default?
> > CONFIG_RTE_APP_TEST=n
> > CONFIG_LIBRTE_VHOST=y
> > CONFIG_RTE_EAL_IGB_UIO=n
> > CONFIG_RTE_LIBRTE_KNI=n
> > CONFIG_RTE_BUILD_COMBINE_LIBS=y
> > CONFIG_RTE_BUILD_SHARED_LIB=y
> 
> I feel this configuration is the responsibility of the distribution.
> What do you expect to have in the source project?

I just wanted to make it clear what we were doing in case changing build
configuration parameters resulted in a different ABI. If this isn't the
case, then that's fine - it is solely the consider of the distribution
as to what build parameters we pick.

> > The combined library would be placed into /usr/lib/$(ARCH)-linux-gnu/
> > where it can be found without modification to the library search path.
> > We want to ship it like this in Ubuntu anyway, but I?d prefer upstream
> > to have defined it as such since then we?ll have a proper definition of
> > the ABI that can be shared across distributions and other consumers any
> > time ABI compatibility is expected.
> 
> You mean you target ABI compatibility between Linux distributons?
> But other libraries could have different versions so you would be lucky
> to have a binary application finding the same dependencies.

In theory we do get ABI compatibility between distributions. Finding the
dependencies is a separate issue; but if the right binaries were
installed, there would be no conflicts in finding shared libraries
across binaries from different distributions if the ABI is managed
right.

But that isn't directly our target.

It's still useful to us to have this done right. It makes ABI
transitions in the distribution (coordinating updates to libraries and
their consumers concurrently) possible without breaking things in the
middle. It means that when we talk to upstreams (both libraries and
their consumers) then we're speaking the same language as other
distributions, and patches apply to them all without each distribution
having to kludge things independently. And it gives us options when
different library consumers require different ABI versions since we can
concurrently install two different ABIs of the same library (although we
prefer to avoid that).

> > Though not strictly part of a shared library ABI, I also propose some
> > build-related upstream changes at API level below, that I?d like to also
> > ship in the initial Ubuntu packaging of the header files. Clearly you
> > cannot make this change in an existing release, but I propose that you
> > do this for your next release so all library consumers will see a
> > consistent and standard API interface. If you agree to this, then I?d
> > also like to ship the Ubuntu package with patches to do the same thing
> > in your current release.
> 
> Yes cleanup patches are welcome :)

I'm arranging to have someone work on these with you upstream and send
you patches, thanks.

Robie


[dpdk-dev] ksoftirqd when using KNI

2015-09-18 Thread Moon-Sang Lee
I'm a newbie and testing DPDK KNI with 1G intel NIC.

According to my understanding of DPDK documents,
KNI should not raise interrupts when sending/receiving packets.

But when I transmit bunch of packets to my KNI ports,
'top command' shows ksoftirqd with 50% CPU load.

Would you give me some comments about this situation?



-- 
Moon-Sang Lee, SW Engineer
Email: sang0627 at gmail.com
Wisdom begins in wonder. *Socrates*


[dpdk-dev] IXGBE error statistics

2015-09-18 Thread Igor Ryzhov
Hello, Harry.

Thank you, I'll wait for result of mspdc testing.

About rte_eth_stats - I found that not generic fields of the structure are all 
deprecated already. I will research xstats API, thank you.

Best regards,
Igor

> 18 . 2015 ?., ? 11:04, Van Haaren, Harry  
> ???(?):
> 
>> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Igor Ryzhov
>> Hello everyone.
> 
> Hi Igor,
> 
>> Investigating IXGBE driver I found an mspdc counter (MAC Short Packet
>> Discard). And I am wondering why this counter is not used in the calculation
>> of total RX errors (ierrors field in rte_eth_stats structure). Is it already 
>> a part
>> of another counter, for example, rlec (Receive Length Error)? Or is it a bug?
> 
> There has been a discussion on list recently involving ixgbe stats, and 
> certain
> packets triggering multiple stats registers - the datasheet doesn't mention
> this could be the case for the mspdc register, I will research this issue and
> get back to you.
> 
>> Another one question is about incompleteness of rte_eth_stats structure.
>> IXGBE and other drivers have a lot of counters but only a part of them is
>> represented in rte_eth_stats. Is there any valuable reasons for that or it's
>> just not implemented?
> 
> The rte_eth_stats struct presents the most general statistics that every NIC 
> exposes.
> In 2.1, and extended statistics API was added which allows NICs to expose 
> stats
> that are unique to that NIC. Currently ixgbe is the only driver that has the 
> xstats API
> implemented, I am working on patches to implement the functionality for the 
> other
> Intel drivers.
> 
> As part of testing the xstats implementation for each driver, I can test the 
> exact
> behavior of the mspdc counter, and if it is mis-counted this should become 
> clear.
> 
> Cheers, -Harry



[dpdk-dev] [PATCH v5 00/12] vhost-user multiple queues enabling

2015-09-18 Thread Yuanhan Liu
This patch set enables vhost-user multiple queues.

Overview


It depends on some QEMU patches that, hopefully, will be merged soon.
Those qemu patches introduce some new vhost-user messages, for vhost-user
mq enabling negotiation. Here is the main negotiation steps (Qemu
as master, and DPDK vhost-user as slave):

- Master queries features by VHOST_USER_GET_FEATURES from slave

- Check if VHOST_USER_F_PROTOCOL_FEATURES exist. If not, mq is not
  supported. (check patch 1 for why VHOST_USER_F_PROTOCOL_FEATURES
  is introduced)

- Master then sends another command, VHOST_USER_GET_QUEUE_NUM, for
  querying how many queues the slave supports.

  Master will compare the result with the requested queue number.
  Qemu exits if the former is smaller.

- Master then tries to initiate all queue pairs by sending some vhost
  user commands, including VHOST_USER_SET_VRING_CALL, which will
  trigger the slave to do related vring setup, such as vring allocation.


Till now, all necessary initiation and negotiation are done. And master
could send another message, VHOST_USER_SET_VRING_ENABLE, to enable/disable
a specific queue dynamically later.


Patchset


Patch 1-7 are all prepare works for enabling mq; they are all atomic
changes, which is designed to not break anything.

Patch 8 acutally enables mq feature, by setting two key feature flags.

Patch 9-12 are for demostrating the mq feature.


Testing
===

Host side
--

- # Start vhost-switch

  sudo mount -t hugetlbfs nodev /mnt/huge
  sudo modprobe uio
  sudo insmod $RTE_SDK/$RTE_TARGET/kmod/igb_uio.ko

  sudo $RTE_SDK/tools/dpdk_nic_bind.py --bind igb_uio :08:00.0

  sudo $RTE_SDK/examples/vhost/build/vhost-switch -c 0xf0 -n 4 \
   --huge-dir /mnt/huge --socket-mem 2048,0 -- -p 1 --vm2vm 0  \
   --dev-basename usvhost --rxq 2

  # Above common generates a usvhost socket file at PWD. You could also
  # specify "--stats 1" option to enable stats dumping.



- # start qemu


  sudo sudo mount -t hugetlbfs nodev $HOME/hugetlbfs
  $QEMU_DIR/x86_64-softmmu/qemu-system-x86_64 -machine accel=kvm -m 4G \
-object 
memory-backend-file,id=mem,size=4G,mem-path=$HOME/hugetlbfs,share=on \
-numa node,memdev=mem -chardev socket,id=chr0,path=/path/to/usvhost \
-netdev vhost-user,id=net0,chardev=chr0,vhostforce,queues=2 \
-device 
virtio-net-pci,netdev=net0,mq=on,vectors=6,mac=52:54:00:12:34:58,csum=off,gso=off,guest_tso4=off,guest_tso6=off,guest_ecn=off
 \
-hda $HOME/iso/fc-22-x86_64.img -smp 10 -cpu 
core2duo,+sse3,+sse4.1,+sse4.2


Guest side
--

   modprobe uio
   insmod $RTE_SDK/$RTE_TARGET/kmod/igb_uio.ko
   echo 1024 > 
/sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
   ./tools/dpdk_nic_bind.py --bind igb_uio 00:03.0

   $RTE_SDK/$RTE_TARGET/app/testpmd -c 1f -n 4 -- --rxq=2 --txq=2 \
--nb-cores=4 -i --disable-hw-vlan --txqflags 0xf00

   > set fwd mac
   > start tx_first


After those setups, you then could use packet generator for packet tx/rx 
testing.

---
Changchun Ouyang (7):
  vhost: rxtx: prepare work for multiple queue support
  vhost: add VHOST_USER_SET_VRING_ENABLE message
  virtio: resolve for control queue
  vhost: add API bind a virtq to a specific core
  ixgbe: support VMDq RSS in non-SRIOV environment
  examples/vhost: demonstrate the usage of vhost mq feature
  examples/vhost: add per queue stats

Yuanhan Liu (5):
  vhost-user: add protocol features support
  vhost-user: add VHOST_USER_GET_QUEUE_NUM message
  vhost: vring queue setup for multiple queue support
  vhost-user: handle VHOST_USER_RESET_OWNER correctly
  vhost-user: enable vhost-user multiple queue

 drivers/net/ixgbe/ixgbe_rxtx.c|  86 +-
 drivers/net/virtio/virtio_ethdev.c|  12 +-
 examples/vhost/main.c | 420 +-
 examples/vhost/main.h |   3 +-
 lib/librte_ether/rte_ethdev.c |  11 +
 lib/librte_vhost/rte_vhost_version.map|   7 +
 lib/librte_vhost/rte_virtio_net.h |  30 +-
 lib/librte_vhost/vhost_rxtx.c |  56 +++-
 lib/librte_vhost/vhost_user/vhost-net-user.c  |  27 +-
 lib/librte_vhost/vhost_user/vhost-net-user.h  |   4 +
 lib/librte_vhost/vhost_user/virtio-net-user.c |  79 +++--
 lib/librte_vhost/vhost_user/virtio-net-user.h |  10 +
 lib/librte_vhost/virtio-net.c | 158 +++---
 13 files changed, 659 insertions(+), 244 deletions(-)

-- 
1.9.0



[dpdk-dev] [PATCH v5 01/12] vhost-user: add protocol features support

2015-09-18 Thread Yuanhan Liu
The two protocol features messages are introduced by qemu vhost
maintainer(Michael) for extendting vhost-user interface. Here is
an excerpta from the vhost-user spec:

Any protocol extensions are gated by protocol feature bits,
which allows full backwards compatibility on both master
and slave.

The vhost-user multiple queue features will be treated as a vhost-user
extension, hence, we have to implement the two messages first.

VHOST_USER_PROTOCOL_FEATURES is initated to 0, as we don't support
any yet.

Signed-off-by: Yuanhan Liu 
---
 lib/librte_vhost/rte_virtio_net.h |  1 +
 lib/librte_vhost/vhost_user/vhost-net-user.c  | 13 -
 lib/librte_vhost/vhost_user/vhost-net-user.h  |  2 ++
 lib/librte_vhost/vhost_user/virtio-net-user.c | 13 +
 lib/librte_vhost/vhost_user/virtio-net-user.h |  5 +
 lib/librte_vhost/virtio-net.c |  5 -
 6 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index a037c15..e3a21e5 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -99,6 +99,7 @@ struct virtio_net {
struct vhost_virtqueue  *virtqueue[VIRTIO_QNUM];/**< Contains 
all virtqueue information. */
struct virtio_memory*mem;   /**< QEMU memory and memory 
region information. */
uint64_tfeatures;   /**< Negotiated feature set. */
+   uint64_tprotocol_features;  /**< Negotiated 
protocol feature set. */
uint64_tdevice_fh;  /**< device identifier. */
uint32_tflags;  /**< Device flags. Only used to 
check if device is running on data core. */
 #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c 
b/lib/librte_vhost/vhost_user/vhost-net-user.c
index d1f8877..bc2ad24 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -95,7 +95,9 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
[VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
[VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
-   [VHOST_USER_SET_VRING_ERR]  = "VHOST_USER_SET_VRING_ERR"
+   [VHOST_USER_SET_VRING_ERR]  = "VHOST_USER_SET_VRING_ERR",
+   [VHOST_USER_GET_PROTOCOL_FEATURES]  = 
"VHOST_USER_GET_PROTOCOL_FEATURES",
+   [VHOST_USER_SET_PROTOCOL_FEATURES]  = 
"VHOST_USER_SET_PROTOCOL_FEATURES",
 };

 /**
@@ -363,6 +365,15 @@ vserver_message_handler(int connfd, void *dat, int *remove)
ops->set_features(ctx, &features);
break;

+   case VHOST_USER_GET_PROTOCOL_FEATURES:
+   msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES;
+   msg.size = sizeof(msg.payload.u64);
+   send_vhost_message(connfd, &msg);
+   break;
+   case VHOST_USER_SET_PROTOCOL_FEATURES:
+   user_set_protocol_features(ctx, msg.payload.u64);
+   break;
+
case VHOST_USER_SET_OWNER:
ops->set_owner(ctx);
break;
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h 
b/lib/librte_vhost/vhost_user/vhost-net-user.h
index 2e72f3c..4490d23 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.h
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
@@ -63,6 +63,8 @@ typedef enum VhostUserRequest {
VHOST_USER_SET_VRING_KICK = 12,
VHOST_USER_SET_VRING_CALL = 13,
VHOST_USER_SET_VRING_ERR = 14,
+   VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+   VHOST_USER_SET_PROTOCOL_FEATURES = 16,
VHOST_USER_MAX
 } VhostUserRequest;

diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c 
b/lib/librte_vhost/vhost_user/virtio-net-user.c
index 4689927..360254e 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -316,3 +316,16 @@ user_destroy_device(struct vhost_device_ctx ctx)
dev->mem = NULL;
}
 }
+
+void
+user_set_protocol_features(struct vhost_device_ctx ctx,
+  uint64_t protocol_features)
+{
+   struct virtio_net *dev;
+
+   dev = get_device(ctx);
+   if (dev == NULL || protocol_features & ~VHOST_USER_PROTOCOL_FEATURES)
+   return;
+
+   dev->protocol_features = protocol_features;
+}
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h 
b/lib/librte_vhost/vhost_user/virtio-net-user.h
index df24860..e7a6ff4 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.h
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
@@ -37,12 +37,17 @@
 #include "vhost-net.h"
 #include "vhost-net-user.h"

+#define VHOST_USER_PROTOCOL_FEATURES   0ULL
+
 int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);

 void user_set_vring

[dpdk-dev] [PATCH v5 02/12] vhost-user: add VHOST_USER_GET_QUEUE_NUM message

2015-09-18 Thread Yuanhan Liu
To tell the frontend (qemu) how many queue pairs we support.

And it is initiated to VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX.

Signed-off-by: Yuanhan Liu 
---
 lib/librte_vhost/vhost_user/vhost-net-user.c | 7 +++
 lib/librte_vhost/vhost_user/vhost-net-user.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c 
b/lib/librte_vhost/vhost_user/vhost-net-user.c
index bc2ad24..8675cd4 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -98,6 +98,7 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_SET_VRING_ERR]  = "VHOST_USER_SET_VRING_ERR",
[VHOST_USER_GET_PROTOCOL_FEATURES]  = 
"VHOST_USER_GET_PROTOCOL_FEATURES",
[VHOST_USER_SET_PROTOCOL_FEATURES]  = 
"VHOST_USER_SET_PROTOCOL_FEATURES",
+   [VHOST_USER_GET_QUEUE_NUM]  = "VHOST_USER_GET_QUEUE_NUM",
 };

 /**
@@ -421,6 +422,12 @@ vserver_message_handler(int connfd, void *dat, int *remove)
RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
break;

+   case VHOST_USER_GET_QUEUE_NUM:
+   msg.payload.u64 = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX;
+   msg.size = sizeof(msg.payload.u64);
+   send_vhost_message(connfd, &msg);
+   break;
+
default:
break;

diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h 
b/lib/librte_vhost/vhost_user/vhost-net-user.h
index 4490d23..389d21d 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.h
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
@@ -65,6 +65,7 @@ typedef enum VhostUserRequest {
VHOST_USER_SET_VRING_ERR = 14,
VHOST_USER_GET_PROTOCOL_FEATURES = 15,
VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+   VHOST_USER_GET_QUEUE_NUM = 17,
VHOST_USER_MAX
 } VhostUserRequest;

-- 
1.9.0



[dpdk-dev] [PATCH v5 03/12] vhost: vring queue setup for multiple queue support

2015-09-18 Thread Yuanhan Liu
All queue pairs, including the default (the first) queue pair,
are allocated dynamically, when a vring_call message is received
first time for a specific queue pair.

This is a refactor work for enabling vhost-user multiple queue;
it should not break anything as it does no functional changes:
we don't support mq set, so there is only one mq at max.

This patch is based on Changchun's patch.

Signed-off-by: Yuanhan Liu 
---
 lib/librte_vhost/rte_virtio_net.h |   3 +-
 lib/librte_vhost/vhost_user/virtio-net-user.c |  44 +-
 lib/librte_vhost/virtio-net.c | 121 --
 3 files changed, 102 insertions(+), 66 deletions(-)

diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index e3a21e5..5dd6493 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -96,7 +96,7 @@ struct vhost_virtqueue {
  * Device structure contains all configuration information relating to the 
device.
  */
 struct virtio_net {
-   struct vhost_virtqueue  *virtqueue[VIRTIO_QNUM];/**< Contains 
all virtqueue information. */
+   struct vhost_virtqueue  *virtqueue[VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX];
/**< Contains all virtqueue information. */
struct virtio_memory*mem;   /**< QEMU memory and memory 
region information. */
uint64_tfeatures;   /**< Negotiated feature set. */
uint64_tprotocol_features;  /**< Negotiated 
protocol feature set. */
@@ -104,6 +104,7 @@ struct virtio_net {
uint32_tflags;  /**< Device flags. Only used to 
check if device is running on data core. */
 #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
charifname[IF_NAME_SZ]; /**< Name of the tap 
device or socket path. */
+   uint32_tvirt_qp_nb; /**< number of queue pair we 
have allocated */
void*priv;  /**< private context */
 } __rte_cache_aligned;

diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c 
b/lib/librte_vhost/vhost_user/virtio-net-user.c
index 360254e..e83d279 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -206,25 +206,33 @@ err_mmap:
 }

 static int
+vq_is_ready(struct vhost_virtqueue *vq)
+{
+   return vq && vq->desc   &&
+  vq->kickfd != -1 &&
+  vq->callfd != -1;
+}
+
+static int
 virtio_is_ready(struct virtio_net *dev)
 {
struct vhost_virtqueue *rvq, *tvq;
+   uint32_t i;

-   /* mq support in future.*/
-   rvq = dev->virtqueue[VIRTIO_RXQ];
-   tvq = dev->virtqueue[VIRTIO_TXQ];
-   if (rvq && tvq && rvq->desc && tvq->desc &&
-   (rvq->kickfd != -1) &&
-   (rvq->callfd != -1) &&
-   (tvq->kickfd != -1) &&
-   (tvq->callfd != -1)) {
-   RTE_LOG(INFO, VHOST_CONFIG,
-   "virtio is now ready for processing.\n");
-   return 1;
+   for (i = 0; i < dev->virt_qp_nb; i++) {
+   rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
+   tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
+
+   if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) {
+   RTE_LOG(INFO, VHOST_CONFIG,
+   "virtio is not ready for processing.\n");
+   return 0;
+   }
}
+
RTE_LOG(INFO, VHOST_CONFIG,
-   "virtio isn't ready for processing.\n");
-   return 0;
+   "virtio is now ready for processing.\n");
+   return 1;
 }

 void
@@ -290,13 +298,9 @@ user_get_vring_base(struct vhost_device_ctx ctx,
 * sent and only sent in vhost_vring_stop.
 * TODO: cleanup the vring, it isn't usable since here.
 */
-   if ((dev->virtqueue[VIRTIO_RXQ]->kickfd) >= 0) {
-   close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
-   dev->virtqueue[VIRTIO_RXQ]->kickfd = -1;
-   }
-   if ((dev->virtqueue[VIRTIO_TXQ]->kickfd) >= 0) {
-   close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
-   dev->virtqueue[VIRTIO_TXQ]->kickfd = -1;
+   if ((dev->virtqueue[state->index]->kickfd) >= 0) {
+   close(dev->virtqueue[state->index]->kickfd);
+   dev->virtqueue[state->index]->kickfd = -1;
}

return 0;
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index deac6b9..643a92e 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #ifdef RTE_LIBRTE_VHOST_NUMA
@@ -178,6 +179,15 @@ add_config_ll_entry(struct virtio_net_config_ll 
*new_ll_dev)

 }

+static void
+cleanup_vq(struct vhost_virtqueue *vq)
+{
+   if (vq->callfd >= 0)
+   close(vq->callfd);
+   if (

[dpdk-dev] [PATCH v5 04/12] vhost: rxtx: prepare work for multiple queue support

2015-09-18 Thread Yuanhan Liu
From: Changchun Ouyang 

Do not use VIRTIO_RXQ or VIRTIO_TXQ anymore; use the queue_id,
instead, which will be set to a proper value for a specific queue
when we have multiple queue support enabled.

For now, queue_id is still set with VIRTIO_RXQ or VIRTIO_TXQ,
so it should not break anything.

Signed-off-by: Changchun Ouyang 
Signed-off-by: Yuanhan Liu 
---
 lib/librte_vhost/vhost_rxtx.c | 46 ++-
 1 file changed, 32 insertions(+), 14 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index b2b2bcc..a4ab6ca 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -42,6 +42,16 @@

 #define MAX_PKT_BURST 32

+static inline int __attribute__((always_inline))
+is_valid_virt_queue_idx(uint32_t virtq_idx, int is_tx, uint32_t max_qp_idx)
+{
+   if ((is_tx ^ (virtq_idx & 0x1)) ||
+   (virtq_idx >= max_qp_idx * VIRTIO_QNUM))
+   return 0;
+
+   return 1;
+}
+
 /**
  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
  * be received from the physical port or from another virtio device. A packet
@@ -68,12 +78,14 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
uint8_t success = 0;

LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
-   if (unlikely(queue_id != VIRTIO_RXQ)) {
-   LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
+   if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
+   __func__, dev->device_fh, queue_id);
return 0;
}

-   vq = dev->virtqueue[VIRTIO_RXQ];
+   vq = dev->virtqueue[queue_id];
count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;

/*
@@ -235,8 +247,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 }

 static inline uint32_t __attribute__((always_inline))
-copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
-   uint16_t res_end_idx, struct rte_mbuf *pkt)
+copy_from_mbuf_to_vring(struct virtio_net *dev, uint32_t queue_id,
+   uint16_t res_base_idx, uint16_t res_end_idx,
+   struct rte_mbuf *pkt)
 {
uint32_t vec_idx = 0;
uint32_t entry_success = 0;
@@ -264,7 +277,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t 
res_base_idx,
 * Convert from gpa to vva
 * (guest physical addr -> vhost virtual addr)
 */
-   vq = dev->virtqueue[VIRTIO_RXQ];
+   vq = dev->virtqueue[queue_id];
vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
vb_hdr_addr = vb_addr;

@@ -464,11 +477,14 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,

LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
dev->device_fh);
-   if (unlikely(queue_id != VIRTIO_RXQ)) {
-   LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
+   if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
+   __func__, dev->device_fh, queue_id);
+   return 0;
}

-   vq = dev->virtqueue[VIRTIO_RXQ];
+   vq = dev->virtqueue[queue_id];
count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);

if (count == 0)
@@ -509,8 +525,8 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,
res_cur_idx);
} while (success == 0);

-   entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
-   res_cur_idx, pkts[pkt_idx]);
+   entry_success = copy_from_mbuf_to_vring(dev, queue_id,
+   res_base_idx, res_cur_idx, pkts[pkt_idx]);

rte_compiler_barrier();

@@ -559,12 +575,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint16_t free_entries, entry_success = 0;
uint16_t avail_idx;

-   if (unlikely(queue_id != VIRTIO_TXQ)) {
-   LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
+   if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
+   __func__, dev->device_fh, queue_id);
return 0;
}

-   vq = dev->virtqueue[VIRTIO_TXQ];
+   vq = dev->virtqueue[queue_id];
avail_idx =  *((volatile uint16_t *)&vq->avail->idx);

/* If there are no available buffers then return. */
-- 
1.9.0



[dpdk-dev] [PATCH v5 05/12] vhost: add VHOST_USER_SET_VRING_ENABLE message

2015-09-18 Thread Yuanhan Liu
From: Changchun Ouyang 

This message is used to enable/disable a specific vring queue pair.
The first queue pair is enabled by default.

Signed-off-by: Changchun Ouyang 
Signed-off-by: Yuanhan Liu 
---
 lib/librte_vhost/rte_virtio_net.h |  1 +
 lib/librte_vhost/vhost_rxtx.c | 10 ++
 lib/librte_vhost/vhost_user/vhost-net-user.c  |  5 +
 lib/librte_vhost/vhost_user/vhost-net-user.h  |  1 +
 lib/librte_vhost/vhost_user/virtio-net-user.c | 22 ++
 lib/librte_vhost/vhost_user/virtio-net-user.h |  3 +++
 lib/librte_vhost/virtio-net.c | 12 +---
 7 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index 5dd6493..08b69df 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -89,6 +89,7 @@ struct vhost_virtqueue {
volatile uint16_t   last_used_idx_res;  /**< Used for multiple 
devices reserving buffers. */
int callfd; /**< Used to notify the 
guest (trigger interrupt). */
int kickfd; /**< Currently unused 
as polling mode is enabled. */
+   int enabled;
struct buf_vector   buf_vec[BUF_VECTOR_MAX];/**< for 
scatter RX. */
 } __rte_cache_aligned;

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index a4ab6ca..aa9ccda 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -86,6 +86,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
}

vq = dev->virtqueue[queue_id];
+   if (unlikely(vq->enabled == 0))
+   return 0;
+
count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;

/*
@@ -278,6 +281,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint32_t 
queue_id,
 * (guest physical addr -> vhost virtual addr)
 */
vq = dev->virtqueue[queue_id];
+
vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
vb_hdr_addr = vb_addr;

@@ -485,6 +489,9 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,
}

vq = dev->virtqueue[queue_id];
+   if (unlikely(vq->enabled == 0))
+   return 0;
+
count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);

if (count == 0)
@@ -583,6 +590,9 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
}

vq = dev->virtqueue[queue_id];
+   if (unlikely(vq->enabled == 0))
+   return 0;
+
avail_idx =  *((volatile uint16_t *)&vq->avail->idx);

/* If there are no available buffers then return. */
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c 
b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 8675cd4..f681676 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -99,6 +99,7 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_GET_PROTOCOL_FEATURES]  = 
"VHOST_USER_GET_PROTOCOL_FEATURES",
[VHOST_USER_SET_PROTOCOL_FEATURES]  = 
"VHOST_USER_SET_PROTOCOL_FEATURES",
[VHOST_USER_GET_QUEUE_NUM]  = "VHOST_USER_GET_QUEUE_NUM",
+   [VHOST_USER_SET_VRING_ENABLE]  = "VHOST_USER_SET_VRING_ENABLE",
 };

 /**
@@ -428,6 +429,10 @@ vserver_message_handler(int connfd, void *dat, int *remove)
send_vhost_message(connfd, &msg);
break;

+   case VHOST_USER_SET_VRING_ENABLE:
+   user_set_vring_enable(ctx, &msg.payload.state);
+   break;
+
default:
break;

diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h 
b/lib/librte_vhost/vhost_user/vhost-net-user.h
index 389d21d..38637cc 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.h
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
@@ -66,6 +66,7 @@ typedef enum VhostUserRequest {
VHOST_USER_GET_PROTOCOL_FEATURES = 15,
VHOST_USER_SET_PROTOCOL_FEATURES = 16,
VHOST_USER_GET_QUEUE_NUM = 17,
+   VHOST_USER_SET_VRING_ENABLE = 18,
VHOST_USER_MAX
 } VhostUserRequest;

diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c 
b/lib/librte_vhost/vhost_user/virtio-net-user.c
index e83d279..9871f20 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -306,6 +306,28 @@ user_get_vring_base(struct vhost_device_ctx ctx,
return 0;
 }

+/*
+ * when virtio queues are ready to work, qemu will send us to
+ * enable the virtio queue pair.
+ */
+int
+user_set_vring_enable(struct vhost_device_ctx ctx,
+ struct vhost_vring_state *state)
+{
+   struct virtio_net *dev = get_device(ctx);
+   uint32_t base_idx = state->index;
+   int enabled = (int)state->num;
+
+   RTE_LOG(INFO, VHOST_CONFIG,
+   "set queue enable: %d to qp idx: %d\n",

[dpdk-dev] [PATCH v5 06/12] vhost-user: handle VHOST_USER_RESET_OWNER correctly

2015-09-18 Thread Yuanhan Liu
Destroy corresponding device when a VHOST_USER_RESET_OWNER message is
received, otherwise, the vhost-switch would still try to access vq
of that device, which results to SIGSEG fault, and let vhost-switch
crash in the end.

Signed-off-by: Changchun Ouyang 
Signed-off-by: Yuanhan Liu 
---
 lib/librte_vhost/vhost_user/vhost-net-user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c 
b/lib/librte_vhost/vhost_user/vhost-net-user.c
index f681676..8fad385 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -380,7 +380,7 @@ vserver_message_handler(int connfd, void *dat, int *remove)
ops->set_owner(ctx);
break;
case VHOST_USER_RESET_OWNER:
-   ops->reset_owner(ctx);
+   user_destroy_device(ctx);
break;

case VHOST_USER_SET_MEM_TABLE:
-- 
1.9.0



[dpdk-dev] [PATCH v5 07/12] virtio: resolve for control queue

2015-09-18 Thread Yuanhan Liu
From: Changchun Ouyang 

Fix the max virtio queue pair read issue.

Control queue can't work for vhost-user mulitple queue mode,
so introduce a counter to void the dead loop when polling
the control queue.

Signed-off-by: Changchun Ouyang 
Signed-off-by: Yuanhan Liu 
---
 drivers/net/virtio/virtio_ethdev.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 465d3cd..b2f4120 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1162,7 +1162,6 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
struct virtio_hw *hw = eth_dev->data->dev_private;
struct virtio_net_config *config;
struct virtio_net_config local_config;
-   uint32_t offset_conf = sizeof(config->mac);
struct rte_pci_device *pci_dev;

RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr));
@@ -1222,7 +1221,9 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
config = &local_config;

if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
-   offset_conf += sizeof(config->status);
+   vtpci_read_dev_config(hw,
+   offsetof(struct virtio_net_config, status),
+   &config->status, sizeof(config->status));
} else {
PMD_INIT_LOG(DEBUG,
 "VIRTIO_NET_F_STATUS is not supported");
@@ -1230,15 +1231,16 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
}

if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) {
-   offset_conf += sizeof(config->max_virtqueue_pairs);
+   vtpci_read_dev_config(hw,
+   offsetof(struct virtio_net_config, 
max_virtqueue_pairs),
+   &config->max_virtqueue_pairs,
+   sizeof(config->max_virtqueue_pairs));
} else {
PMD_INIT_LOG(DEBUG,
 "VIRTIO_NET_F_MQ is not supported");
config->max_virtqueue_pairs = 1;
}

-   vtpci_read_dev_config(hw, 0, (uint8_t *)config, offset_conf);
-
hw->max_rx_queues =
(VIRTIO_MAX_RX_QUEUES < config->max_virtqueue_pairs) ?
VIRTIO_MAX_RX_QUEUES : config->max_virtqueue_pairs;
-- 
1.9.0



[dpdk-dev] [PATCH v5 08/12] vhost-user: enable vhost-user multiple queue

2015-09-18 Thread Yuanhan Liu
By setting VHOST_USER_PROTOCOL_F_MQ protocol feature bit, and
VIRTIO_NET_F_MQ feature bit.

Signed-off-by: Yuanhan Liu 
---
 lib/librte_vhost/vhost_user/virtio-net-user.h | 4 +++-
 lib/librte_vhost/virtio-net.c | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h 
b/lib/librte_vhost/vhost_user/virtio-net-user.h
index d46057e..b82108d 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.h
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
@@ -37,7 +37,9 @@
 #include "vhost-net.h"
 #include "vhost-net-user.h"

-#define VHOST_USER_PROTOCOL_FEATURES   0ULL
+#define VHOST_USER_PROTOCOL_F_MQ   0
+
+#define VHOST_USER_PROTOCOL_FEATURES   (1ULL << VHOST_USER_PROTOCOL_F_MQ)

 int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);

diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 5fe1ad6..49840b5 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -74,6 +74,7 @@ static struct virtio_net_config_ll *ll_root;
 #define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
(1ULL << VIRTIO_NET_F_CTRL_VQ) | \
(1ULL << VIRTIO_NET_F_CTRL_RX) | \
+   (1ULL << VIRTIO_NET_F_MQ)  | \
(1ULL << VHOST_F_LOG_ALL)  | \
(1ULL << VHOST_USER_F_PROTOCOL_FEATURES))
 static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
-- 
1.9.0



[dpdk-dev] [PATCH v5 09/12] vhost: add API bind a virtq to a specific core

2015-09-18 Thread Yuanhan Liu
From: Changchun Ouyang 

The new API rte_vhost_core_id_set() is to bind a virtq to a specific
core, while the another API rte_vhost_core_id_get() is for getting
the bind core for a virtq.

The usage, which will be introduced soon, could be find at examles/vhost/main.c.

Signed-off-by: Changchun Ouyang 
Signed-off-by: Yuanhan Liu 
---
 lib/librte_vhost/rte_vhost_version.map |  7 +++
 lib/librte_vhost/rte_virtio_net.h  | 25 +
 lib/librte_vhost/virtio-net.c  | 25 +
 3 files changed, 57 insertions(+)

diff --git a/lib/librte_vhost/rte_vhost_version.map 
b/lib/librte_vhost/rte_vhost_version.map
index 3d8709e..2ce141c 100644
--- a/lib/librte_vhost/rte_vhost_version.map
+++ b/lib/librte_vhost/rte_vhost_version.map
@@ -18,5 +18,12 @@ DPDK_2.1 {
global:

rte_vhost_driver_unregister;
+} DPDK_2.0;
+
+
+DPDK_2.2 {
+   global:

+   rte_vhost_core_id_get;
+   rte_vhost_core_id_set;
 } DPDK_2.0;
diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index 08b69df..7785729 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -90,6 +90,7 @@ struct vhost_virtqueue {
int callfd; /**< Used to notify the 
guest (trigger interrupt). */
int kickfd; /**< Currently unused 
as polling mode is enabled. */
int enabled;
+   uint32_tcore_id;/**< Data core that the 
vq is attached to */
struct buf_vector   buf_vec[BUF_VECTOR_MAX];/**< for 
scatter RX. */
 } __rte_cache_aligned;

@@ -238,4 +239,28 @@ uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, 
uint16_t queue_id,
 uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);

+/**
+ * This function get the data core id for queue pair in one vhost device.
+ * @param dev
+ *  virtio-net device
+ * @param queue_id
+ *  virtio queue index in mq case
+ * @return
+ *  core id of queue pair of specified virtio device.
+ */
+uint16_t rte_vhost_core_id_get(volatile struct virtio_net *dev,
+  uint16_t queue_id);
+
+/**
+ * This function set the data core id for queue pair in one vhost device.
+ * @param dev
+ *  virtio-net device
+ * @param queue_id
+ *  virtio queue index in mq case
+ * @param core_id
+ *  data core id for virtio queue pair in mq case
+ */
+void rte_vhost_core_id_set(struct virtio_net *dev, uint16_t queue_id,
+  uint16_t core_id);
+
 #endif /* _VIRTIO_NET_H_ */
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 49840b5..33bdacd 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -867,6 +867,31 @@ int rte_vhost_feature_enable(uint64_t feature_mask)
return -1;
 }

+uint16_t
+rte_vhost_core_id_get(volatile struct virtio_net *dev, uint16_t queue_id)
+{
+   if (dev == NULL)
+   return 0;
+
+   if (dev->virtqueue == NULL || dev->virtqueue[queue_id] == NULL)
+   return 0;
+
+   return dev->virtqueue[queue_id]->core_id;
+}
+
+void
+rte_vhost_core_id_set(struct virtio_net *dev, uint16_t queue_id,
+ uint16_t core_id)
+{
+   if (dev == NULL)
+   return;
+
+   if (dev->virtqueue == NULL || dev->virtqueue[queue_id] == NULL)
+   return;
+
+   dev->virtqueue[queue_id]->core_id = core_id;
+}
+
 /*
  * Register ops so that we can add/remove device to data core.
  */
-- 
1.9.0



[dpdk-dev] [PATCH v5 10/12] ixgbe: support VMDq RSS in non-SRIOV environment

2015-09-18 Thread Yuanhan Liu
From: Changchun Ouyang 

In non-SRIOV environment, VMDq RSS could be enabled by MRQC register.
In theory, the queue number per pool could be 2 or 4, but only 2 queues
are available due to HW limitation, the same limit also exists in Linux
ixgbe driver.

Signed-off-by: Changchun Ouyang 
Signed-off-by: Yuanhan Liu 
---
 drivers/net/ixgbe/ixgbe_rxtx.c | 86 +++---
 lib/librte_ether/rte_ethdev.c  | 11 ++
 2 files changed, 84 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index a598a72..e502fe8 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -3445,16 +3445,16 @@ void ixgbe_configure_dcb(struct rte_eth_dev *dev)
return;
 }

-/*
- * VMDq only support for 10 GbE NIC.
+/**
+ * Config pool for VMDq on 10 GbE NIC.
  */
 static void
-ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
+ixgbe_vmdq_pool_configure(struct rte_eth_dev *dev)
 {
struct rte_eth_vmdq_rx_conf *cfg;
struct ixgbe_hw *hw;
enum rte_eth_nb_pools num_pools;
-   uint32_t mrqc, vt_ctl, vlanctrl;
+   uint32_t vt_ctl, vlanctrl;
uint32_t vmolr = 0;
int i;

@@ -3463,12 +3463,6 @@ ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
num_pools = cfg->nb_queue_pools;

-   ixgbe_rss_disable(dev);
-
-   /* MRQC: enable vmdq */
-   mrqc = IXGBE_MRQC_VMDQEN;
-   IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
-
/* PFVTCTL: turn on virtualisation and set the default pool */
vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
if (cfg->enable_default_pool)
@@ -3534,7 +3528,29 @@ ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
IXGBE_WRITE_FLUSH(hw);
 }

-/*
+/**
+ * VMDq only support for 10 GbE NIC.
+ */
+static void
+ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
+{
+   struct ixgbe_hw *hw;
+   uint32_t mrqc;
+
+   PMD_INIT_FUNC_TRACE();
+   hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+   ixgbe_rss_disable(dev);
+
+   /* MRQC: enable vmdq */
+   mrqc = IXGBE_MRQC_VMDQEN;
+   IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
+   IXGBE_WRITE_FLUSH(hw);
+
+   ixgbe_vmdq_pool_configure(dev);
+}
+
+/**
  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
  * @hw: pointer to hardware structure
  */
@@ -3639,6 +3655,41 @@ ixgbe_config_vf_rss(struct rte_eth_dev *dev)
 }

 static int
+ixgbe_config_vmdq_rss(struct rte_eth_dev *dev)
+{
+   struct ixgbe_hw *hw;
+   uint32_t mrqc;
+
+   ixgbe_rss_configure(dev);
+
+   hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+   /* MRQC: enable VMDQ RSS */
+   mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
+   mrqc &= ~IXGBE_MRQC_MRQE_MASK;
+
+   switch (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool) {
+   case 2:
+   mrqc |= IXGBE_MRQC_VMDQRSS64EN;
+   break;
+
+   case 4:
+   mrqc |= IXGBE_MRQC_VMDQRSS32EN;
+   break;
+
+   default:
+   PMD_INIT_LOG(ERR, "Invalid pool number in non-IOV mode with 
VMDQ RSS");
+   return -EINVAL;
+   }
+
+   IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
+
+   ixgbe_vmdq_pool_configure(dev);
+
+   return 0;
+}
+
+static int
 ixgbe_config_vf_default(struct rte_eth_dev *dev)
 {
struct ixgbe_hw *hw =
@@ -3694,6 +3745,10 @@ ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
ixgbe_vmdq_rx_hw_configure(dev);
break;

+   case ETH_MQ_RX_VMDQ_RSS:
+   ixgbe_config_vmdq_rss(dev);
+   break;
+
case ETH_MQ_RX_NONE:
/* if mq_mode is none, disable rss mode.*/
default: ixgbe_rss_disable(dev);
@@ -4186,6 +4241,8 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)

/* Setup RX queues */
for (i = 0; i < dev->data->nb_rx_queues; i++) {
+   uint32_t psrtype = 0;
+
rxq = dev->data->rx_queues[i];

/*
@@ -4213,12 +4270,10 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
if (rx_conf->header_split) {
if (hw->mac.type == ixgbe_mac_82599EB) {
/* Must setup the PSRTYPE register */
-   uint32_t psrtype;
psrtype = IXGBE_PSRTYPE_TCPHDR |
IXGBE_PSRTYPE_UDPHDR   |
IXGBE_PSRTYPE_IPV4HDR  |
IXGBE_PSRTYPE_IPV6HDR;
-   IXGBE_WRITE_REG(hw, 
IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
}
srrctl = ((rx_conf->split_hdr_size <<
IXGBE_SRRCTL_BSIZEH

[dpdk-dev] [PATCH v5 11/12] examples/vhost: demonstrate the usage of vhost mq feature

2015-09-18 Thread Yuanhan Liu
From: Changchun Ouyang 

This patch demonstrates the usage of vhost mq feature, by leveraging
the VMDq+RSS HW feature to receive packets and distribute them into
different queue in the pool according to 5 tuples.

Queue number is specified by the --rxq option.

HW queue numbers in pool is exactly same with the queue number in virtio
device, e.g. rxq = 4, the queue number is 4, it means 4 HW queues in
each VMDq pool, and 4 queues in each virtio device/port, one maps to
each.

=
==|   |==|
   vport0 |   |  vport1  |
---  ---  ---  ---|   |---  ---  ---  ---|
q0 | q1 | q2 | q3 |   |q0 | q1 | q2 | q3 |
/\= =/\= =/\= =/\=|   |/\= =/\= =/\= =/\=|
||   ||   ||   ||  ||   ||   ||   ||
||   ||   ||   ||  ||   ||   ||   ||
||= =||= =||= =||=|   =||== ||== ||== ||=|
q0 | q1 | q2 | q3 |   |q0 | q1 | q2 | q3 |

--|   |--|
 VMDq pool0   |   |VMDq pool1|
==|   |==|

In RX side, it firstly polls each queue of the pool and gets the
packets from it and enqueue them into its corresponding queue in
virtio device/port.  In TX side, it dequeue packets from each queue
of virtio device/port and send them to either physical port or
another virtio device according to its destination MAC address.

We bind the virtq to a specific core by rte_vhost_core_id_set(),
and later we can retrieve it by rte_vhost_core_id_get().

Signed-off-by: Changchun Ouyang 
Signed-off-by: Yuanhan Liu 
---
 examples/vhost/main.c | 325 ++
 examples/vhost/main.h |   3 +-
 2 files changed, 225 insertions(+), 103 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 9eac2d0..23b7aa7 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -163,6 +163,9 @@ static int mergeable;
 /* Do vlan strip on host, enabled on default */
 static uint32_t vlan_strip = 1;

+/* Rx queue number per virtio device */
+static uint32_t rxq = 1;
+
 /* number of descriptors to apply*/
 static uint32_t num_rx_descriptor = RTE_TEST_RX_DESC_DEFAULT_ZCP;
 static uint32_t num_tx_descriptor = RTE_TEST_TX_DESC_DEFAULT_ZCP;
@@ -365,6 +368,37 @@ validate_num_devices(uint32_t max_nb_devices)
return 0;
 }

+static int
+get_dev_nb_for_82599(struct rte_eth_dev_info dev_info)
+{
+   int dev_nb = -1;
+   switch (rxq) {
+   case 1:
+   case 2:
+   /*
+* for 82599, dev_info.max_vmdq_pools always 64 dispite rx mode.
+*/
+   dev_nb = (int)dev_info.max_vmdq_pools;
+   break;
+   case 4:
+   dev_nb = (int)dev_info.max_vmdq_pools / 2;
+   break;
+   default:
+   RTE_LOG(ERR, VHOST_CONFIG, "invalid rxq for VMDq.\n");
+   }
+   return dev_nb;
+}
+
+static int
+get_dev_nb_for_fvl(struct rte_eth_dev_info dev_info)
+{
+   /*
+* for FVL, dev_info.max_vmdq_pools is calculated according to
+* the configured value: CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM.
+*/
+   return (int)dev_info.max_vmdq_pools;
+}
+
 /*
  * Initialises a given port using global settings and with the rx buffers
  * coming from the mbuf_pool passed as parameter
@@ -380,6 +414,7 @@ port_init(uint8_t port)
uint16_t rx_ring_size, tx_ring_size;
int retval;
uint16_t q;
+   struct rte_eth_dev *eth_dev;

/* The max pool number from dev_info will be used to validate the pool 
number specified in cmd line */
rte_eth_dev_info_get (port, &dev_info);
@@ -408,8 +443,16 @@ port_init(uint8_t port)
txconf->tx_deferred_start = 1;
}

-   /*configure the number of supported virtio devices based on VMDQ limits 
*/
-   num_devices = dev_info.max_vmdq_pools;
+   /* Configure the virtio devices num based on VMDQ limits */
+   if (dev_info.max_vmdq_pools == ETH_64_POOLS) {
+   num_devices = (uint32_t)get_dev_nb_for_82599(dev_info);
+   if (num_devices == (uint32_t)-1)
+   return -1;
+   } else {
+   num_devices = (uint32_t)get_dev_nb_for_fvl(dev_info);
+   if (num_devices == (uint32_t)-1)
+   return -1;
+   }

if (zero_copy) {
rx_ring_size = num_rx_descriptor;
@@ -431,7 +474,7 @@ port_init(uint8_t port)
return retval;
/* NIC queues are divided into pf queues and vmdq queues.  */
num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
-   queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
+   queues_per_pool = dev_info.vmdq_queue_num / num_devices;
num_vmdq_queues = num_devices * queues_per_pool;
num_queues = num_pf_queues + num_vmdq_queues;
vmdq_queue_base = dev_info.vmdq_queue_base;
@@ -447,6 +490,14 @@ port_init(uint8_t port)
if (retval != 0)
   

[dpdk-dev] [PATCH v5 12/12] examples/vhost: add per queue stats

2015-09-18 Thread Yuanhan Liu
From: Changchun Ouyang 

Signed-off-by: Changchun Ouyang 
Signed-off-by: Yuanhan Liu 
---
 examples/vhost/main.c | 97 +--
 1 file changed, 56 insertions(+), 41 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 23b7aa7..06a3ac7 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -314,7 +314,7 @@ struct ipv4_hdr {
 #define VLAN_ETH_HLEN   18

 /* Per-device statistics struct */
-struct device_statistics {
+struct qp_statistics {
uint64_t tx_total;
rte_atomic64_t rx_total_atomic;
uint64_t rx_total;
@@ -322,6 +322,10 @@ struct device_statistics {
rte_atomic64_t rx_atomic;
uint64_t rx;
 } __rte_cache_aligned;
+
+struct device_statistics {
+   struct qp_statistics *qp_stats;
+};
 struct device_statistics dev_statistics[MAX_DEVICES];

 /*
@@ -775,6 +779,17 @@ us_vhost_parse_args(int argc, char **argv)
return -1;
} else {
enable_stats = ret;
+   if (enable_stats)
+   for (i = 0; i < MAX_DEVICES; 
i++) {
+   
dev_statistics[i].qp_stats =
+   
malloc(VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX * sizeof(struct qp_statistics));
+   if 
(dev_statistics[i].qp_stats == NULL) {
+   RTE_LOG(ERR, 
VHOST_CONFIG, "Failed to allocate memory for qp stats.\n");
+   return -1;
+   }
+   
memset(dev_statistics[i].qp_stats, 0,
+   
VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX * sizeof(struct qp_statistics));
+   }
}
}

@@ -1131,13 +1146,13 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf 
*m, uint32_t qp_idx)
&m, 1);
if (enable_stats) {
rte_atomic64_add(
-   
&dev_statistics[tdev->device_fh].rx_total_atomic,
+   
&dev_statistics[tdev->device_fh].qp_stats[qp_idx].rx_total_atomic,
1);
rte_atomic64_add(
-   
&dev_statistics[tdev->device_fh].rx_atomic,
+   
&dev_statistics[tdev->device_fh].qp_stats[qp_idx].rx_atomic,
ret);
-   
dev_statistics[tdev->device_fh].tx_total++;
-   dev_statistics[tdev->device_fh].tx += 
ret;
+   
dev_statistics[dev->device_fh].qp_stats[qp_idx].tx_total++;
+   
dev_statistics[dev->device_fh].qp_stats[qp_idx].tx += ret;
}
}

@@ -1271,8 +1286,8 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf 
*m,
tx_q->m_table[len] = m;
len++;
if (enable_stats) {
-   dev_statistics[dev->device_fh].tx_total++;
-   dev_statistics[dev->device_fh].tx++;
+   dev_statistics[dev->device_fh].qp_stats[qp_idx].tx_total++;
+   dev_statistics[dev->device_fh].qp_stats[qp_idx].tx++;
}

if (unlikely(len == MAX_PKT_BURST)) {
@@ -1403,10 +1418,10 @@ switch_worker(__attribute__((unused)) void *arg)

pkts_burst, rx_count);
if (enable_stats) {
rte_atomic64_add(
-   
&dev_statistics[dev_ll->vdev->dev->device_fh].rx_total_atomic,
+   
&dev_statistics[dev_ll->vdev->dev->device_fh].qp_stats[qp_idx].rx_total_atomic,
rx_count);
rte_atomic64_add(
-   
&dev_statistics[dev_ll->vdev->dev->device_fh].rx_atomic, ret_count);
+   
&dev_statistics[dev_ll->vdev->dev->device_fh].qp_stats[qp_idx].rx_atomic, 
ret_count);
}
while (likely(rx_count)) {
rx_count--;
@@ -1954,8 +1969,8 @@ virtio_tx_route_zcp(st

[dpdk-dev] [PATCH v5 00/12] vhost-user multiple queues enabling

2015-09-18 Thread Yuanhan Liu
Sorry that I typed wrong email address of Changchun; I will resend them.
Sorry for the noisy.

--yliu

On Fri, Sep 18, 2015 at 11:01:01PM +0800, Yuanhan Liu wrote:
> This patch set enables vhost-user multiple queues.
> 
> Overview
> 
> 
> It depends on some QEMU patches that, hopefully, will be merged soon.
> Those qemu patches introduce some new vhost-user messages, for vhost-user
> mq enabling negotiation. Here is the main negotiation steps (Qemu
> as master, and DPDK vhost-user as slave):
> 
> - Master queries features by VHOST_USER_GET_FEATURES from slave
> 
> - Check if VHOST_USER_F_PROTOCOL_FEATURES exist. If not, mq is not
>   supported. (check patch 1 for why VHOST_USER_F_PROTOCOL_FEATURES
>   is introduced)
> 
> - Master then sends another command, VHOST_USER_GET_QUEUE_NUM, for
>   querying how many queues the slave supports.
> 
>   Master will compare the result with the requested queue number.
>   Qemu exits if the former is smaller.
> 
> - Master then tries to initiate all queue pairs by sending some vhost
>   user commands, including VHOST_USER_SET_VRING_CALL, which will
>   trigger the slave to do related vring setup, such as vring allocation.
> 
> 
> Till now, all necessary initiation and negotiation are done. And master
> could send another message, VHOST_USER_SET_VRING_ENABLE, to enable/disable
> a specific queue dynamically later.
> 
> 
> Patchset
> 
> 
> Patch 1-7 are all prepare works for enabling mq; they are all atomic
> changes, which is designed to not break anything.
> 
> Patch 8 acutally enables mq feature, by setting two key feature flags.
> 
> Patch 9-12 are for demostrating the mq feature.
> 
> 
> Testing
> ===
> 
> Host side
> --
> 
> - # Start vhost-switch
> 
>   sudo mount -t hugetlbfs nodev /mnt/huge
>   sudo modprobe uio
>   sudo insmod $RTE_SDK/$RTE_TARGET/kmod/igb_uio.ko
> 
>   sudo $RTE_SDK/tools/dpdk_nic_bind.py --bind igb_uio :08:00.0
> 
>   sudo $RTE_SDK/examples/vhost/build/vhost-switch -c 0xf0 -n 4 \
>--huge-dir /mnt/huge --socket-mem 2048,0 -- -p 1 --vm2vm 0  \
>--dev-basename usvhost --rxq 2
> 
>   # Above common generates a usvhost socket file at PWD. You could also
>   # specify "--stats 1" option to enable stats dumping.
> 
> 
> 
> - # start qemu
> 
> 
>   sudo sudo mount -t hugetlbfs nodev $HOME/hugetlbfs
>   $QEMU_DIR/x86_64-softmmu/qemu-system-x86_64 -machine accel=kvm -m 4G \
> -object 
> memory-backend-file,id=mem,size=4G,mem-path=$HOME/hugetlbfs,share=on \
>   -numa node,memdev=mem -chardev socket,id=chr0,path=/path/to/usvhost \
>   -netdev vhost-user,id=net0,chardev=chr0,vhostforce,queues=2 \
>   -device 
> virtio-net-pci,netdev=net0,mq=on,vectors=6,mac=52:54:00:12:34:58,csum=off,gso=off,guest_tso4=off,guest_tso6=off,guest_ecn=off
>  \
>   -hda $HOME/iso/fc-22-x86_64.img -smp 10 -cpu 
> core2duo,+sse3,+sse4.1,+sse4.2
> 
> 
> Guest side
> --
> 
>modprobe uio
>insmod $RTE_SDK/$RTE_TARGET/kmod/igb_uio.ko
>echo 1024 > 
> /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
>./tools/dpdk_nic_bind.py --bind igb_uio 00:03.0
> 
>$RTE_SDK/$RTE_TARGET/app/testpmd -c 1f -n 4 -- --rxq=2 --txq=2 \
> --nb-cores=4 -i --disable-hw-vlan --txqflags 0xf00
>  
>> set fwd mac
>> start tx_first
>  
> 
> After those setups, you then could use packet generator for packet tx/rx 
> testing.
> 
> ---
> Changchun Ouyang (7):
>   vhost: rxtx: prepare work for multiple queue support
>   vhost: add VHOST_USER_SET_VRING_ENABLE message
>   virtio: resolve for control queue
>   vhost: add API bind a virtq to a specific core
>   ixgbe: support VMDq RSS in non-SRIOV environment
>   examples/vhost: demonstrate the usage of vhost mq feature
>   examples/vhost: add per queue stats
> 
> Yuanhan Liu (5):
>   vhost-user: add protocol features support
>   vhost-user: add VHOST_USER_GET_QUEUE_NUM message
>   vhost: vring queue setup for multiple queue support
>   vhost-user: handle VHOST_USER_RESET_OWNER correctly
>   vhost-user: enable vhost-user multiple queue
> 
>  drivers/net/ixgbe/ixgbe_rxtx.c|  86 +-
>  drivers/net/virtio/virtio_ethdev.c|  12 +-
>  examples/vhost/main.c | 420 
> +-
>  examples/vhost/main.h |   3 +-
>  lib/librte_ether/rte_ethdev.c |  11 +
>  lib/librte_vhost/rte_vhost_version.map|   7 +
>  lib/librte_vhost/rte_virtio_net.h |  30 +-
>  lib/librte_vhost/vhost_rxtx.c |  56 +++-
>  lib/librte_vhost/vhost_user/vhost-net-user.c  |  27 +-
>  lib/librte_vhost/vhost_user/vhost-net-user.h  |   4 +
>  lib/librte_vhost/vhost_user/virtio-net-user.c |  79 +++--
>  lib/librte_vhost/vhost_user/virtio-net-user.h |  10 +
>  lib/librte_vhost/virtio-net.c | 158 +++---
>  13 files changed, 659 insertions(+), 244 deletions(-)
> 
> -- 
> 1.9.0


[dpdk-dev] [PATCH v5 resend 00/12] vhost-user multiple queues enabling

2015-09-18 Thread Yuanhan Liu
This patch set enables vhost-user multiple queues.

Overview


It depends on some QEMU patches that, hopefully, will be merged soon.
Those qemu patches introduce some new vhost-user messages, for vhost-user
mq enabling negotiation. Here is the main negotiation steps (Qemu
as master, and DPDK vhost-user as slave):

- Master queries features by VHOST_USER_GET_FEATURES from slave

- Check if VHOST_USER_F_PROTOCOL_FEATURES exist. If not, mq is not
  supported. (check patch 1 for why VHOST_USER_F_PROTOCOL_FEATURES
  is introduced)

- Master then sends another command, VHOST_USER_GET_QUEUE_NUM, for
  querying how many queues the slave supports.

  Master will compare the result with the requested queue number.
  Qemu exits if the former is smaller.

- Master then tries to initiate all queue pairs by sending some vhost
  user commands, including VHOST_USER_SET_VRING_CALL, which will
  trigger the slave to do related vring setup, such as vring allocation.


Till now, all necessary initiation and negotiation are done. And master
could send another message, VHOST_USER_SET_VRING_ENABLE, to enable/disable
a specific queue dynamically later.


Patchset


Patch 1-7 are all prepare works for enabling mq; they are all atomic
changes, which is designed to not break anything.

Patch 8 acutally enables mq feature, by setting two key feature flags.

Patch 9-12 are for demostrating the mq feature.


Testing
===

Host side
--

- # Start vhost-switch

  sudo mount -t hugetlbfs nodev /mnt/huge
  sudo modprobe uio
  sudo insmod $RTE_SDK/$RTE_TARGET/kmod/igb_uio.ko

  sudo $RTE_SDK/tools/dpdk_nic_bind.py --bind igb_uio :08:00.0

  sudo $RTE_SDK/examples/vhost/build/vhost-switch -c 0xf0 -n 4 \
   --huge-dir /mnt/huge --socket-mem 2048,0 -- -p 1 --vm2vm 0  \
   --dev-basename usvhost --rxq 2

  # Above common generates a usvhost socket file at PWD. You could also
  # specify "--stats 1" option to enable stats dumping.



- # start qemu


  sudo sudo mount -t hugetlbfs nodev $HOME/hugetlbfs
  $QEMU_DIR/x86_64-softmmu/qemu-system-x86_64 -machine accel=kvm -m 4G \
-object 
memory-backend-file,id=mem,size=4G,mem-path=$HOME/hugetlbfs,share=on \
-numa node,memdev=mem -chardev socket,id=chr0,path=/path/to/usvhost \
-netdev vhost-user,id=net0,chardev=chr0,vhostforce,queues=2 \
-device 
virtio-net-pci,netdev=net0,mq=on,vectors=6,mac=52:54:00:12:34:58,csum=off,gso=off,guest_tso4=off,guest_tso6=off,guest_ecn=off
 \
-hda $HOME/iso/fc-22-x86_64.img -smp 10 -cpu 
core2duo,+sse3,+sse4.1,+sse4.2


Guest side
--

   modprobe uio
   insmod $RTE_SDK/$RTE_TARGET/kmod/igb_uio.ko
   echo 1024 > 
/sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
   ./tools/dpdk_nic_bind.py --bind igb_uio 00:03.0

   $RTE_SDK/$RTE_TARGET/app/testpmd -c 1f -n 4 -- --rxq=2 --txq=2 \
--nb-cores=4 -i --disable-hw-vlan --txqflags 0xf00

   > set fwd mac
   > start tx_first


After those setups, you then could use packet generator for packet tx/rx 
testing.

---
Changchun Ouyang (7):
  vhost: rxtx: prepare work for multiple queue support
  vhost: add VHOST_USER_SET_VRING_ENABLE message
  virtio: resolve for control queue
  vhost: add API bind a virtq to a specific core
  ixgbe: support VMDq RSS in non-SRIOV environment
  examples/vhost: demonstrate the usage of vhost mq feature
  examples/vhost: add per queue stats

Yuanhan Liu (5):
  vhost-user: add protocol features support
  vhost-user: add VHOST_USER_GET_QUEUE_NUM message
  vhost: vring queue setup for multiple queue support
  vhost-user: handle VHOST_USER_RESET_OWNER correctly
  vhost-user: enable vhost-user multiple queue

 drivers/net/ixgbe/ixgbe_rxtx.c|  86 +-
 drivers/net/virtio/virtio_ethdev.c|  12 +-
 examples/vhost/main.c | 420 +-
 examples/vhost/main.h |   3 +-
 lib/librte_ether/rte_ethdev.c |  11 +
 lib/librte_vhost/rte_vhost_version.map|   7 +
 lib/librte_vhost/rte_virtio_net.h |  30 +-
 lib/librte_vhost/vhost_rxtx.c |  56 +++-
 lib/librte_vhost/vhost_user/vhost-net-user.c  |  27 +-
 lib/librte_vhost/vhost_user/vhost-net-user.h  |   4 +
 lib/librte_vhost/vhost_user/virtio-net-user.c |  79 +++--
 lib/librte_vhost/vhost_user/virtio-net-user.h |  10 +
 lib/librte_vhost/virtio-net.c | 158 +++---
 13 files changed, 659 insertions(+), 244 deletions(-)

-- 
1.9.0



[dpdk-dev] [PATCH v5 resend 01/12] vhost-user: add protocol features support

2015-09-18 Thread Yuanhan Liu
The two protocol features messages are introduced by qemu vhost
maintainer(Michael) for extendting vhost-user interface. Here is
an excerpta from the vhost-user spec:

Any protocol extensions are gated by protocol feature bits,
which allows full backwards compatibility on both master
and slave.

The vhost-user multiple queue features will be treated as a vhost-user
extension, hence, we have to implement the two messages first.

VHOST_USER_PROTOCOL_FEATURES is initated to 0, as we don't support
any yet.

Signed-off-by: Yuanhan Liu 
---
 lib/librte_vhost/rte_virtio_net.h |  1 +
 lib/librte_vhost/vhost_user/vhost-net-user.c  | 13 -
 lib/librte_vhost/vhost_user/vhost-net-user.h  |  2 ++
 lib/librte_vhost/vhost_user/virtio-net-user.c | 13 +
 lib/librte_vhost/vhost_user/virtio-net-user.h |  5 +
 lib/librte_vhost/virtio-net.c |  5 -
 6 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index a037c15..e3a21e5 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -99,6 +99,7 @@ struct virtio_net {
struct vhost_virtqueue  *virtqueue[VIRTIO_QNUM];/**< Contains 
all virtqueue information. */
struct virtio_memory*mem;   /**< QEMU memory and memory 
region information. */
uint64_tfeatures;   /**< Negotiated feature set. */
+   uint64_tprotocol_features;  /**< Negotiated 
protocol feature set. */
uint64_tdevice_fh;  /**< device identifier. */
uint32_tflags;  /**< Device flags. Only used to 
check if device is running on data core. */
 #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c 
b/lib/librte_vhost/vhost_user/vhost-net-user.c
index d1f8877..bc2ad24 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -95,7 +95,9 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
[VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
[VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
-   [VHOST_USER_SET_VRING_ERR]  = "VHOST_USER_SET_VRING_ERR"
+   [VHOST_USER_SET_VRING_ERR]  = "VHOST_USER_SET_VRING_ERR",
+   [VHOST_USER_GET_PROTOCOL_FEATURES]  = 
"VHOST_USER_GET_PROTOCOL_FEATURES",
+   [VHOST_USER_SET_PROTOCOL_FEATURES]  = 
"VHOST_USER_SET_PROTOCOL_FEATURES",
 };

 /**
@@ -363,6 +365,15 @@ vserver_message_handler(int connfd, void *dat, int *remove)
ops->set_features(ctx, &features);
break;

+   case VHOST_USER_GET_PROTOCOL_FEATURES:
+   msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES;
+   msg.size = sizeof(msg.payload.u64);
+   send_vhost_message(connfd, &msg);
+   break;
+   case VHOST_USER_SET_PROTOCOL_FEATURES:
+   user_set_protocol_features(ctx, msg.payload.u64);
+   break;
+
case VHOST_USER_SET_OWNER:
ops->set_owner(ctx);
break;
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h 
b/lib/librte_vhost/vhost_user/vhost-net-user.h
index 2e72f3c..4490d23 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.h
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
@@ -63,6 +63,8 @@ typedef enum VhostUserRequest {
VHOST_USER_SET_VRING_KICK = 12,
VHOST_USER_SET_VRING_CALL = 13,
VHOST_USER_SET_VRING_ERR = 14,
+   VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+   VHOST_USER_SET_PROTOCOL_FEATURES = 16,
VHOST_USER_MAX
 } VhostUserRequest;

diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c 
b/lib/librte_vhost/vhost_user/virtio-net-user.c
index 4689927..360254e 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -316,3 +316,16 @@ user_destroy_device(struct vhost_device_ctx ctx)
dev->mem = NULL;
}
 }
+
+void
+user_set_protocol_features(struct vhost_device_ctx ctx,
+  uint64_t protocol_features)
+{
+   struct virtio_net *dev;
+
+   dev = get_device(ctx);
+   if (dev == NULL || protocol_features & ~VHOST_USER_PROTOCOL_FEATURES)
+   return;
+
+   dev->protocol_features = protocol_features;
+}
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h 
b/lib/librte_vhost/vhost_user/virtio-net-user.h
index df24860..e7a6ff4 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.h
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
@@ -37,12 +37,17 @@
 #include "vhost-net.h"
 #include "vhost-net-user.h"

+#define VHOST_USER_PROTOCOL_FEATURES   0ULL
+
 int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);

 void user_set_vring

[dpdk-dev] [PATCH v5 resend 02/12] vhost-user: add VHOST_USER_GET_QUEUE_NUM message

2015-09-18 Thread Yuanhan Liu
To tell the frontend (qemu) how many queue pairs we support.

And it is initiated to VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX.

Signed-off-by: Yuanhan Liu 
---
 lib/librte_vhost/vhost_user/vhost-net-user.c | 7 +++
 lib/librte_vhost/vhost_user/vhost-net-user.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c 
b/lib/librte_vhost/vhost_user/vhost-net-user.c
index bc2ad24..8675cd4 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -98,6 +98,7 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_SET_VRING_ERR]  = "VHOST_USER_SET_VRING_ERR",
[VHOST_USER_GET_PROTOCOL_FEATURES]  = 
"VHOST_USER_GET_PROTOCOL_FEATURES",
[VHOST_USER_SET_PROTOCOL_FEATURES]  = 
"VHOST_USER_SET_PROTOCOL_FEATURES",
+   [VHOST_USER_GET_QUEUE_NUM]  = "VHOST_USER_GET_QUEUE_NUM",
 };

 /**
@@ -421,6 +422,12 @@ vserver_message_handler(int connfd, void *dat, int *remove)
RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
break;

+   case VHOST_USER_GET_QUEUE_NUM:
+   msg.payload.u64 = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX;
+   msg.size = sizeof(msg.payload.u64);
+   send_vhost_message(connfd, &msg);
+   break;
+
default:
break;

diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h 
b/lib/librte_vhost/vhost_user/vhost-net-user.h
index 4490d23..389d21d 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.h
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
@@ -65,6 +65,7 @@ typedef enum VhostUserRequest {
VHOST_USER_SET_VRING_ERR = 14,
VHOST_USER_GET_PROTOCOL_FEATURES = 15,
VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+   VHOST_USER_GET_QUEUE_NUM = 17,
VHOST_USER_MAX
 } VhostUserRequest;

-- 
1.9.0



[dpdk-dev] [PATCH v5 resend 03/12] vhost: vring queue setup for multiple queue support

2015-09-18 Thread Yuanhan Liu
All queue pairs, including the default (the first) queue pair,
are allocated dynamically, when a vring_call message is received
first time for a specific queue pair.

This is a refactor work for enabling vhost-user multiple queue;
it should not break anything as it does no functional changes:
we don't support mq set, so there is only one mq at max.

This patch is based on Changchun's patch.

Signed-off-by: Yuanhan Liu 
---
 lib/librte_vhost/rte_virtio_net.h |   3 +-
 lib/librte_vhost/vhost_user/virtio-net-user.c |  44 +-
 lib/librte_vhost/virtio-net.c | 121 --
 3 files changed, 102 insertions(+), 66 deletions(-)

diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index e3a21e5..5dd6493 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -96,7 +96,7 @@ struct vhost_virtqueue {
  * Device structure contains all configuration information relating to the 
device.
  */
 struct virtio_net {
-   struct vhost_virtqueue  *virtqueue[VIRTIO_QNUM];/**< Contains 
all virtqueue information. */
+   struct vhost_virtqueue  *virtqueue[VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX];
/**< Contains all virtqueue information. */
struct virtio_memory*mem;   /**< QEMU memory and memory 
region information. */
uint64_tfeatures;   /**< Negotiated feature set. */
uint64_tprotocol_features;  /**< Negotiated 
protocol feature set. */
@@ -104,6 +104,7 @@ struct virtio_net {
uint32_tflags;  /**< Device flags. Only used to 
check if device is running on data core. */
 #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
charifname[IF_NAME_SZ]; /**< Name of the tap 
device or socket path. */
+   uint32_tvirt_qp_nb; /**< number of queue pair we 
have allocated */
void*priv;  /**< private context */
 } __rte_cache_aligned;

diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c 
b/lib/librte_vhost/vhost_user/virtio-net-user.c
index 360254e..e83d279 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -206,25 +206,33 @@ err_mmap:
 }

 static int
+vq_is_ready(struct vhost_virtqueue *vq)
+{
+   return vq && vq->desc   &&
+  vq->kickfd != -1 &&
+  vq->callfd != -1;
+}
+
+static int
 virtio_is_ready(struct virtio_net *dev)
 {
struct vhost_virtqueue *rvq, *tvq;
+   uint32_t i;

-   /* mq support in future.*/
-   rvq = dev->virtqueue[VIRTIO_RXQ];
-   tvq = dev->virtqueue[VIRTIO_TXQ];
-   if (rvq && tvq && rvq->desc && tvq->desc &&
-   (rvq->kickfd != -1) &&
-   (rvq->callfd != -1) &&
-   (tvq->kickfd != -1) &&
-   (tvq->callfd != -1)) {
-   RTE_LOG(INFO, VHOST_CONFIG,
-   "virtio is now ready for processing.\n");
-   return 1;
+   for (i = 0; i < dev->virt_qp_nb; i++) {
+   rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
+   tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
+
+   if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) {
+   RTE_LOG(INFO, VHOST_CONFIG,
+   "virtio is not ready for processing.\n");
+   return 0;
+   }
}
+
RTE_LOG(INFO, VHOST_CONFIG,
-   "virtio isn't ready for processing.\n");
-   return 0;
+   "virtio is now ready for processing.\n");
+   return 1;
 }

 void
@@ -290,13 +298,9 @@ user_get_vring_base(struct vhost_device_ctx ctx,
 * sent and only sent in vhost_vring_stop.
 * TODO: cleanup the vring, it isn't usable since here.
 */
-   if ((dev->virtqueue[VIRTIO_RXQ]->kickfd) >= 0) {
-   close(dev->virtqueue[VIRTIO_RXQ]->kickfd);
-   dev->virtqueue[VIRTIO_RXQ]->kickfd = -1;
-   }
-   if ((dev->virtqueue[VIRTIO_TXQ]->kickfd) >= 0) {
-   close(dev->virtqueue[VIRTIO_TXQ]->kickfd);
-   dev->virtqueue[VIRTIO_TXQ]->kickfd = -1;
+   if ((dev->virtqueue[state->index]->kickfd) >= 0) {
+   close(dev->virtqueue[state->index]->kickfd);
+   dev->virtqueue[state->index]->kickfd = -1;
}

return 0;
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index deac6b9..643a92e 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #ifdef RTE_LIBRTE_VHOST_NUMA
@@ -178,6 +179,15 @@ add_config_ll_entry(struct virtio_net_config_ll 
*new_ll_dev)

 }

+static void
+cleanup_vq(struct vhost_virtqueue *vq)
+{
+   if (vq->callfd >= 0)
+   close(vq->callfd);
+   if (

[dpdk-dev] [PATCH v5 resend 04/12] vhost: rxtx: prepare work for multiple queue support

2015-09-18 Thread Yuanhan Liu
From: Changchun Ouyang 

Do not use VIRTIO_RXQ or VIRTIO_TXQ anymore; use the queue_id,
instead, which will be set to a proper value for a specific queue
when we have multiple queue support enabled.

For now, queue_id is still set with VIRTIO_RXQ or VIRTIO_TXQ,
so it should not break anything.

Signed-off-by: Changchun Ouyang 
Signed-off-by: Yuanhan Liu 
---
 lib/librte_vhost/vhost_rxtx.c | 46 ++-
 1 file changed, 32 insertions(+), 14 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index b2b2bcc..a4ab6ca 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -42,6 +42,16 @@

 #define MAX_PKT_BURST 32

+static inline int __attribute__((always_inline))
+is_valid_virt_queue_idx(uint32_t virtq_idx, int is_tx, uint32_t max_qp_idx)
+{
+   if ((is_tx ^ (virtq_idx & 0x1)) ||
+   (virtq_idx >= max_qp_idx * VIRTIO_QNUM))
+   return 0;
+
+   return 1;
+}
+
 /**
  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
  * be received from the physical port or from another virtio device. A packet
@@ -68,12 +78,14 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
uint8_t success = 0;

LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
-   if (unlikely(queue_id != VIRTIO_RXQ)) {
-   LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
+   if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
+   __func__, dev->device_fh, queue_id);
return 0;
}

-   vq = dev->virtqueue[VIRTIO_RXQ];
+   vq = dev->virtqueue[queue_id];
count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;

/*
@@ -235,8 +247,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 }

 static inline uint32_t __attribute__((always_inline))
-copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
-   uint16_t res_end_idx, struct rte_mbuf *pkt)
+copy_from_mbuf_to_vring(struct virtio_net *dev, uint32_t queue_id,
+   uint16_t res_base_idx, uint16_t res_end_idx,
+   struct rte_mbuf *pkt)
 {
uint32_t vec_idx = 0;
uint32_t entry_success = 0;
@@ -264,7 +277,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t 
res_base_idx,
 * Convert from gpa to vva
 * (guest physical addr -> vhost virtual addr)
 */
-   vq = dev->virtqueue[VIRTIO_RXQ];
+   vq = dev->virtqueue[queue_id];
vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
vb_hdr_addr = vb_addr;

@@ -464,11 +477,14 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,

LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
dev->device_fh);
-   if (unlikely(queue_id != VIRTIO_RXQ)) {
-   LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
+   if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
+   __func__, dev->device_fh, queue_id);
+   return 0;
}

-   vq = dev->virtqueue[VIRTIO_RXQ];
+   vq = dev->virtqueue[queue_id];
count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);

if (count == 0)
@@ -509,8 +525,8 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,
res_cur_idx);
} while (success == 0);

-   entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
-   res_cur_idx, pkts[pkt_idx]);
+   entry_success = copy_from_mbuf_to_vring(dev, queue_id,
+   res_base_idx, res_cur_idx, pkts[pkt_idx]);

rte_compiler_barrier();

@@ -559,12 +575,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
uint16_t free_entries, entry_success = 0;
uint16_t avail_idx;

-   if (unlikely(queue_id != VIRTIO_TXQ)) {
-   LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
+   if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
+   RTE_LOG(ERR, VHOST_DATA,
+   "%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
+   __func__, dev->device_fh, queue_id);
return 0;
}

-   vq = dev->virtqueue[VIRTIO_TXQ];
+   vq = dev->virtqueue[queue_id];
avail_idx =  *((volatile uint16_t *)&vq->avail->idx);

/* If there are no available buffers then return. */
-- 
1.9.0



[dpdk-dev] [PATCH v5 resend 05/12] vhost: add VHOST_USER_SET_VRING_ENABLE message

2015-09-18 Thread Yuanhan Liu
From: Changchun Ouyang 

This message is used to enable/disable a specific vring queue pair.
The first queue pair is enabled by default.

Signed-off-by: Changchun Ouyang 
Signed-off-by: Yuanhan Liu 
---
 lib/librte_vhost/rte_virtio_net.h |  1 +
 lib/librte_vhost/vhost_rxtx.c | 10 ++
 lib/librte_vhost/vhost_user/vhost-net-user.c  |  5 +
 lib/librte_vhost/vhost_user/vhost-net-user.h  |  1 +
 lib/librte_vhost/vhost_user/virtio-net-user.c | 22 ++
 lib/librte_vhost/vhost_user/virtio-net-user.h |  3 +++
 lib/librte_vhost/virtio-net.c | 12 +---
 7 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index 5dd6493..08b69df 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -89,6 +89,7 @@ struct vhost_virtqueue {
volatile uint16_t   last_used_idx_res;  /**< Used for multiple 
devices reserving buffers. */
int callfd; /**< Used to notify the 
guest (trigger interrupt). */
int kickfd; /**< Currently unused 
as polling mode is enabled. */
+   int enabled;
struct buf_vector   buf_vec[BUF_VECTOR_MAX];/**< for 
scatter RX. */
 } __rte_cache_aligned;

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index a4ab6ca..aa9ccda 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -86,6 +86,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
}

vq = dev->virtqueue[queue_id];
+   if (unlikely(vq->enabled == 0))
+   return 0;
+
count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;

/*
@@ -278,6 +281,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint32_t 
queue_id,
 * (guest physical addr -> vhost virtual addr)
 */
vq = dev->virtqueue[queue_id];
+
vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
vb_hdr_addr = vb_addr;

@@ -485,6 +489,9 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t 
queue_id,
}

vq = dev->virtqueue[queue_id];
+   if (unlikely(vq->enabled == 0))
+   return 0;
+
count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);

if (count == 0)
@@ -583,6 +590,9 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
queue_id,
}

vq = dev->virtqueue[queue_id];
+   if (unlikely(vq->enabled == 0))
+   return 0;
+
avail_idx =  *((volatile uint16_t *)&vq->avail->idx);

/* If there are no available buffers then return. */
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c 
b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 8675cd4..f681676 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -99,6 +99,7 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_GET_PROTOCOL_FEATURES]  = 
"VHOST_USER_GET_PROTOCOL_FEATURES",
[VHOST_USER_SET_PROTOCOL_FEATURES]  = 
"VHOST_USER_SET_PROTOCOL_FEATURES",
[VHOST_USER_GET_QUEUE_NUM]  = "VHOST_USER_GET_QUEUE_NUM",
+   [VHOST_USER_SET_VRING_ENABLE]  = "VHOST_USER_SET_VRING_ENABLE",
 };

 /**
@@ -428,6 +429,10 @@ vserver_message_handler(int connfd, void *dat, int *remove)
send_vhost_message(connfd, &msg);
break;

+   case VHOST_USER_SET_VRING_ENABLE:
+   user_set_vring_enable(ctx, &msg.payload.state);
+   break;
+
default:
break;

diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h 
b/lib/librte_vhost/vhost_user/vhost-net-user.h
index 389d21d..38637cc 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.h
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
@@ -66,6 +66,7 @@ typedef enum VhostUserRequest {
VHOST_USER_GET_PROTOCOL_FEATURES = 15,
VHOST_USER_SET_PROTOCOL_FEATURES = 16,
VHOST_USER_GET_QUEUE_NUM = 17,
+   VHOST_USER_SET_VRING_ENABLE = 18,
VHOST_USER_MAX
 } VhostUserRequest;

diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c 
b/lib/librte_vhost/vhost_user/virtio-net-user.c
index e83d279..9871f20 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -306,6 +306,28 @@ user_get_vring_base(struct vhost_device_ctx ctx,
return 0;
 }

+/*
+ * when virtio queues are ready to work, qemu will send us to
+ * enable the virtio queue pair.
+ */
+int
+user_set_vring_enable(struct vhost_device_ctx ctx,
+ struct vhost_vring_state *state)
+{
+   struct virtio_net *dev = get_device(ctx);
+   uint32_t base_idx = state->index;
+   int enabled = (int)state->num;
+
+   RTE_LOG(INFO, VHOST_CONFIG,
+   "set queue enable: %d to qp idx: %d\n",

[dpdk-dev] [PATCH v5 resend 06/12] vhost-user: handle VHOST_USER_RESET_OWNER correctly

2015-09-18 Thread Yuanhan Liu
Destroy corresponding device when a VHOST_USER_RESET_OWNER message is
received, otherwise, the vhost-switch would still try to access vq
of that device, which results to SIGSEG fault, and let vhost-switch
crash in the end.

Signed-off-by: Changchun Ouyang 
Signed-off-by: Yuanhan Liu 
---
 lib/librte_vhost/vhost_user/vhost-net-user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c 
b/lib/librte_vhost/vhost_user/vhost-net-user.c
index f681676..8fad385 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -380,7 +380,7 @@ vserver_message_handler(int connfd, void *dat, int *remove)
ops->set_owner(ctx);
break;
case VHOST_USER_RESET_OWNER:
-   ops->reset_owner(ctx);
+   user_destroy_device(ctx);
break;

case VHOST_USER_SET_MEM_TABLE:
-- 
1.9.0



[dpdk-dev] [PATCH v5 resend 07/12] virtio: resolve for control queue

2015-09-18 Thread Yuanhan Liu
From: Changchun Ouyang 

Fix the max virtio queue pair read issue.

Control queue can't work for vhost-user mulitple queue mode,
so introduce a counter to void the dead loop when polling
the control queue.

Signed-off-by: Changchun Ouyang 
Signed-off-by: Yuanhan Liu 
---
 drivers/net/virtio/virtio_ethdev.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 465d3cd..b2f4120 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1162,7 +1162,6 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
struct virtio_hw *hw = eth_dev->data->dev_private;
struct virtio_net_config *config;
struct virtio_net_config local_config;
-   uint32_t offset_conf = sizeof(config->mac);
struct rte_pci_device *pci_dev;

RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr));
@@ -1222,7 +1221,9 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
config = &local_config;

if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
-   offset_conf += sizeof(config->status);
+   vtpci_read_dev_config(hw,
+   offsetof(struct virtio_net_config, status),
+   &config->status, sizeof(config->status));
} else {
PMD_INIT_LOG(DEBUG,
 "VIRTIO_NET_F_STATUS is not supported");
@@ -1230,15 +1231,16 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
}

if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) {
-   offset_conf += sizeof(config->max_virtqueue_pairs);
+   vtpci_read_dev_config(hw,
+   offsetof(struct virtio_net_config, 
max_virtqueue_pairs),
+   &config->max_virtqueue_pairs,
+   sizeof(config->max_virtqueue_pairs));
} else {
PMD_INIT_LOG(DEBUG,
 "VIRTIO_NET_F_MQ is not supported");
config->max_virtqueue_pairs = 1;
}

-   vtpci_read_dev_config(hw, 0, (uint8_t *)config, offset_conf);
-
hw->max_rx_queues =
(VIRTIO_MAX_RX_QUEUES < config->max_virtqueue_pairs) ?
VIRTIO_MAX_RX_QUEUES : config->max_virtqueue_pairs;
-- 
1.9.0



[dpdk-dev] [PATCH v5 resend 08/12] vhost-user: enable vhost-user multiple queue

2015-09-18 Thread Yuanhan Liu
By setting VHOST_USER_PROTOCOL_F_MQ protocol feature bit, and
VIRTIO_NET_F_MQ feature bit.

Signed-off-by: Yuanhan Liu 
---
 lib/librte_vhost/vhost_user/virtio-net-user.h | 4 +++-
 lib/librte_vhost/virtio-net.c | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h 
b/lib/librte_vhost/vhost_user/virtio-net-user.h
index d46057e..b82108d 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.h
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
@@ -37,7 +37,9 @@
 #include "vhost-net.h"
 #include "vhost-net-user.h"

-#define VHOST_USER_PROTOCOL_FEATURES   0ULL
+#define VHOST_USER_PROTOCOL_F_MQ   0
+
+#define VHOST_USER_PROTOCOL_FEATURES   (1ULL << VHOST_USER_PROTOCOL_F_MQ)

 int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);

diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 5fe1ad6..49840b5 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -74,6 +74,7 @@ static struct virtio_net_config_ll *ll_root;
 #define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
(1ULL << VIRTIO_NET_F_CTRL_VQ) | \
(1ULL << VIRTIO_NET_F_CTRL_RX) | \
+   (1ULL << VIRTIO_NET_F_MQ)  | \
(1ULL << VHOST_F_LOG_ALL)  | \
(1ULL << VHOST_USER_F_PROTOCOL_FEATURES))
 static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
-- 
1.9.0



[dpdk-dev] [PATCH v5 resend 09/12] vhost: add API bind a virtq to a specific core

2015-09-18 Thread Yuanhan Liu
From: Changchun Ouyang 

The new API rte_vhost_core_id_set() is to bind a virtq to a specific
core, while the another API rte_vhost_core_id_get() is for getting
the bind core for a virtq.

The usage, which will be introduced soon, could be find at examles/vhost/main.c.

Signed-off-by: Changchun Ouyang 
Signed-off-by: Yuanhan Liu 
---
 lib/librte_vhost/rte_vhost_version.map |  7 +++
 lib/librte_vhost/rte_virtio_net.h  | 25 +
 lib/librte_vhost/virtio-net.c  | 25 +
 3 files changed, 57 insertions(+)

diff --git a/lib/librte_vhost/rte_vhost_version.map 
b/lib/librte_vhost/rte_vhost_version.map
index 3d8709e..2ce141c 100644
--- a/lib/librte_vhost/rte_vhost_version.map
+++ b/lib/librte_vhost/rte_vhost_version.map
@@ -18,5 +18,12 @@ DPDK_2.1 {
global:

rte_vhost_driver_unregister;
+} DPDK_2.0;
+
+
+DPDK_2.2 {
+   global:

+   rte_vhost_core_id_get;
+   rte_vhost_core_id_set;
 } DPDK_2.0;
diff --git a/lib/librte_vhost/rte_virtio_net.h 
b/lib/librte_vhost/rte_virtio_net.h
index 08b69df..7785729 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -90,6 +90,7 @@ struct vhost_virtqueue {
int callfd; /**< Used to notify the 
guest (trigger interrupt). */
int kickfd; /**< Currently unused 
as polling mode is enabled. */
int enabled;
+   uint32_tcore_id;/**< Data core that the 
vq is attached to */
struct buf_vector   buf_vec[BUF_VECTOR_MAX];/**< for 
scatter RX. */
 } __rte_cache_aligned;

@@ -238,4 +239,28 @@ uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, 
uint16_t queue_id,
 uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);

+/**
+ * This function get the data core id for queue pair in one vhost device.
+ * @param dev
+ *  virtio-net device
+ * @param queue_id
+ *  virtio queue index in mq case
+ * @return
+ *  core id of queue pair of specified virtio device.
+ */
+uint16_t rte_vhost_core_id_get(volatile struct virtio_net *dev,
+  uint16_t queue_id);
+
+/**
+ * This function set the data core id for queue pair in one vhost device.
+ * @param dev
+ *  virtio-net device
+ * @param queue_id
+ *  virtio queue index in mq case
+ * @param core_id
+ *  data core id for virtio queue pair in mq case
+ */
+void rte_vhost_core_id_set(struct virtio_net *dev, uint16_t queue_id,
+  uint16_t core_id);
+
 #endif /* _VIRTIO_NET_H_ */
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 49840b5..33bdacd 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -867,6 +867,31 @@ int rte_vhost_feature_enable(uint64_t feature_mask)
return -1;
 }

+uint16_t
+rte_vhost_core_id_get(volatile struct virtio_net *dev, uint16_t queue_id)
+{
+   if (dev == NULL)
+   return 0;
+
+   if (dev->virtqueue == NULL || dev->virtqueue[queue_id] == NULL)
+   return 0;
+
+   return dev->virtqueue[queue_id]->core_id;
+}
+
+void
+rte_vhost_core_id_set(struct virtio_net *dev, uint16_t queue_id,
+ uint16_t core_id)
+{
+   if (dev == NULL)
+   return;
+
+   if (dev->virtqueue == NULL || dev->virtqueue[queue_id] == NULL)
+   return;
+
+   dev->virtqueue[queue_id]->core_id = core_id;
+}
+
 /*
  * Register ops so that we can add/remove device to data core.
  */
-- 
1.9.0



[dpdk-dev] [PATCH v5 resend 10/12] ixgbe: support VMDq RSS in non-SRIOV environment

2015-09-18 Thread Yuanhan Liu
From: Changchun Ouyang 

In non-SRIOV environment, VMDq RSS could be enabled by MRQC register.
In theory, the queue number per pool could be 2 or 4, but only 2 queues
are available due to HW limitation, the same limit also exists in Linux
ixgbe driver.

Signed-off-by: Changchun Ouyang 
Signed-off-by: Yuanhan Liu 
---
 drivers/net/ixgbe/ixgbe_rxtx.c | 86 +++---
 lib/librte_ether/rte_ethdev.c  | 11 ++
 2 files changed, 84 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index a598a72..e502fe8 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -3445,16 +3445,16 @@ void ixgbe_configure_dcb(struct rte_eth_dev *dev)
return;
 }

-/*
- * VMDq only support for 10 GbE NIC.
+/**
+ * Config pool for VMDq on 10 GbE NIC.
  */
 static void
-ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
+ixgbe_vmdq_pool_configure(struct rte_eth_dev *dev)
 {
struct rte_eth_vmdq_rx_conf *cfg;
struct ixgbe_hw *hw;
enum rte_eth_nb_pools num_pools;
-   uint32_t mrqc, vt_ctl, vlanctrl;
+   uint32_t vt_ctl, vlanctrl;
uint32_t vmolr = 0;
int i;

@@ -3463,12 +3463,6 @@ ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
num_pools = cfg->nb_queue_pools;

-   ixgbe_rss_disable(dev);
-
-   /* MRQC: enable vmdq */
-   mrqc = IXGBE_MRQC_VMDQEN;
-   IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
-
/* PFVTCTL: turn on virtualisation and set the default pool */
vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
if (cfg->enable_default_pool)
@@ -3534,7 +3528,29 @@ ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
IXGBE_WRITE_FLUSH(hw);
 }

-/*
+/**
+ * VMDq only support for 10 GbE NIC.
+ */
+static void
+ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
+{
+   struct ixgbe_hw *hw;
+   uint32_t mrqc;
+
+   PMD_INIT_FUNC_TRACE();
+   hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+   ixgbe_rss_disable(dev);
+
+   /* MRQC: enable vmdq */
+   mrqc = IXGBE_MRQC_VMDQEN;
+   IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
+   IXGBE_WRITE_FLUSH(hw);
+
+   ixgbe_vmdq_pool_configure(dev);
+}
+
+/**
  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
  * @hw: pointer to hardware structure
  */
@@ -3639,6 +3655,41 @@ ixgbe_config_vf_rss(struct rte_eth_dev *dev)
 }

 static int
+ixgbe_config_vmdq_rss(struct rte_eth_dev *dev)
+{
+   struct ixgbe_hw *hw;
+   uint32_t mrqc;
+
+   ixgbe_rss_configure(dev);
+
+   hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+   /* MRQC: enable VMDQ RSS */
+   mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
+   mrqc &= ~IXGBE_MRQC_MRQE_MASK;
+
+   switch (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool) {
+   case 2:
+   mrqc |= IXGBE_MRQC_VMDQRSS64EN;
+   break;
+
+   case 4:
+   mrqc |= IXGBE_MRQC_VMDQRSS32EN;
+   break;
+
+   default:
+   PMD_INIT_LOG(ERR, "Invalid pool number in non-IOV mode with 
VMDQ RSS");
+   return -EINVAL;
+   }
+
+   IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
+
+   ixgbe_vmdq_pool_configure(dev);
+
+   return 0;
+}
+
+static int
 ixgbe_config_vf_default(struct rte_eth_dev *dev)
 {
struct ixgbe_hw *hw =
@@ -3694,6 +3745,10 @@ ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
ixgbe_vmdq_rx_hw_configure(dev);
break;

+   case ETH_MQ_RX_VMDQ_RSS:
+   ixgbe_config_vmdq_rss(dev);
+   break;
+
case ETH_MQ_RX_NONE:
/* if mq_mode is none, disable rss mode.*/
default: ixgbe_rss_disable(dev);
@@ -4186,6 +4241,8 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)

/* Setup RX queues */
for (i = 0; i < dev->data->nb_rx_queues; i++) {
+   uint32_t psrtype = 0;
+
rxq = dev->data->rx_queues[i];

/*
@@ -4213,12 +4270,10 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
if (rx_conf->header_split) {
if (hw->mac.type == ixgbe_mac_82599EB) {
/* Must setup the PSRTYPE register */
-   uint32_t psrtype;
psrtype = IXGBE_PSRTYPE_TCPHDR |
IXGBE_PSRTYPE_UDPHDR   |
IXGBE_PSRTYPE_IPV4HDR  |
IXGBE_PSRTYPE_IPV6HDR;
-   IXGBE_WRITE_REG(hw, 
IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
}
srrctl = ((rx_conf->split_hdr_size <<
IXGBE_SRRCTL_BSIZEH

[dpdk-dev] [PATCH v5 resend 11/12] examples/vhost: demonstrate the usage of vhost mq feature

2015-09-18 Thread Yuanhan Liu
From: Changchun Ouyang 

This patch demonstrates the usage of vhost mq feature, by leveraging
the VMDq+RSS HW feature to receive packets and distribute them into
different queue in the pool according to 5 tuples.

Queue number is specified by the --rxq option.

HW queue numbers in pool is exactly same with the queue number in virtio
device, e.g. rxq = 4, the queue number is 4, it means 4 HW queues in
each VMDq pool, and 4 queues in each virtio device/port, one maps to
each.

=
==|   |==|
   vport0 |   |  vport1  |
---  ---  ---  ---|   |---  ---  ---  ---|
q0 | q1 | q2 | q3 |   |q0 | q1 | q2 | q3 |
/\= =/\= =/\= =/\=|   |/\= =/\= =/\= =/\=|
||   ||   ||   ||  ||   ||   ||   ||
||   ||   ||   ||  ||   ||   ||   ||
||= =||= =||= =||=|   =||== ||== ||== ||=|
q0 | q1 | q2 | q3 |   |q0 | q1 | q2 | q3 |

--|   |--|
 VMDq pool0   |   |VMDq pool1|
==|   |==|

In RX side, it firstly polls each queue of the pool and gets the
packets from it and enqueue them into its corresponding queue in
virtio device/port.  In TX side, it dequeue packets from each queue
of virtio device/port and send them to either physical port or
another virtio device according to its destination MAC address.

We bind the virtq to a specific core by rte_vhost_core_id_set(),
and later we can retrieve it by rte_vhost_core_id_get().

Signed-off-by: Changchun Ouyang 
Signed-off-by: Yuanhan Liu 
---
 examples/vhost/main.c | 325 ++
 examples/vhost/main.h |   3 +-
 2 files changed, 225 insertions(+), 103 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 9eac2d0..23b7aa7 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -163,6 +163,9 @@ static int mergeable;
 /* Do vlan strip on host, enabled on default */
 static uint32_t vlan_strip = 1;

+/* Rx queue number per virtio device */
+static uint32_t rxq = 1;
+
 /* number of descriptors to apply*/
 static uint32_t num_rx_descriptor = RTE_TEST_RX_DESC_DEFAULT_ZCP;
 static uint32_t num_tx_descriptor = RTE_TEST_TX_DESC_DEFAULT_ZCP;
@@ -365,6 +368,37 @@ validate_num_devices(uint32_t max_nb_devices)
return 0;
 }

+static int
+get_dev_nb_for_82599(struct rte_eth_dev_info dev_info)
+{
+   int dev_nb = -1;
+   switch (rxq) {
+   case 1:
+   case 2:
+   /*
+* for 82599, dev_info.max_vmdq_pools always 64 dispite rx mode.
+*/
+   dev_nb = (int)dev_info.max_vmdq_pools;
+   break;
+   case 4:
+   dev_nb = (int)dev_info.max_vmdq_pools / 2;
+   break;
+   default:
+   RTE_LOG(ERR, VHOST_CONFIG, "invalid rxq for VMDq.\n");
+   }
+   return dev_nb;
+}
+
+static int
+get_dev_nb_for_fvl(struct rte_eth_dev_info dev_info)
+{
+   /*
+* for FVL, dev_info.max_vmdq_pools is calculated according to
+* the configured value: CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM.
+*/
+   return (int)dev_info.max_vmdq_pools;
+}
+
 /*
  * Initialises a given port using global settings and with the rx buffers
  * coming from the mbuf_pool passed as parameter
@@ -380,6 +414,7 @@ port_init(uint8_t port)
uint16_t rx_ring_size, tx_ring_size;
int retval;
uint16_t q;
+   struct rte_eth_dev *eth_dev;

/* The max pool number from dev_info will be used to validate the pool 
number specified in cmd line */
rte_eth_dev_info_get (port, &dev_info);
@@ -408,8 +443,16 @@ port_init(uint8_t port)
txconf->tx_deferred_start = 1;
}

-   /*configure the number of supported virtio devices based on VMDQ limits 
*/
-   num_devices = dev_info.max_vmdq_pools;
+   /* Configure the virtio devices num based on VMDQ limits */
+   if (dev_info.max_vmdq_pools == ETH_64_POOLS) {
+   num_devices = (uint32_t)get_dev_nb_for_82599(dev_info);
+   if (num_devices == (uint32_t)-1)
+   return -1;
+   } else {
+   num_devices = (uint32_t)get_dev_nb_for_fvl(dev_info);
+   if (num_devices == (uint32_t)-1)
+   return -1;
+   }

if (zero_copy) {
rx_ring_size = num_rx_descriptor;
@@ -431,7 +474,7 @@ port_init(uint8_t port)
return retval;
/* NIC queues are divided into pf queues and vmdq queues.  */
num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
-   queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
+   queues_per_pool = dev_info.vmdq_queue_num / num_devices;
num_vmdq_queues = num_devices * queues_per_pool;
num_queues = num_pf_queues + num_vmdq_queues;
vmdq_queue_base = dev_info.vmdq_queue_base;
@@ -447,6 +490,14 @@ port_init(uint8_t port)
if (retval != 0)
   

[dpdk-dev] [PATCH v5 resend 12/12] examples/vhost: add per queue stats

2015-09-18 Thread Yuanhan Liu
From: Changchun Ouyang 

Signed-off-by: Changchun Ouyang 
Signed-off-by: Yuanhan Liu 
---
 examples/vhost/main.c | 97 +--
 1 file changed, 56 insertions(+), 41 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 23b7aa7..06a3ac7 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -314,7 +314,7 @@ struct ipv4_hdr {
 #define VLAN_ETH_HLEN   18

 /* Per-device statistics struct */
-struct device_statistics {
+struct qp_statistics {
uint64_t tx_total;
rte_atomic64_t rx_total_atomic;
uint64_t rx_total;
@@ -322,6 +322,10 @@ struct device_statistics {
rte_atomic64_t rx_atomic;
uint64_t rx;
 } __rte_cache_aligned;
+
+struct device_statistics {
+   struct qp_statistics *qp_stats;
+};
 struct device_statistics dev_statistics[MAX_DEVICES];

 /*
@@ -775,6 +779,17 @@ us_vhost_parse_args(int argc, char **argv)
return -1;
} else {
enable_stats = ret;
+   if (enable_stats)
+   for (i = 0; i < MAX_DEVICES; 
i++) {
+   
dev_statistics[i].qp_stats =
+   
malloc(VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX * sizeof(struct qp_statistics));
+   if 
(dev_statistics[i].qp_stats == NULL) {
+   RTE_LOG(ERR, 
VHOST_CONFIG, "Failed to allocate memory for qp stats.\n");
+   return -1;
+   }
+   
memset(dev_statistics[i].qp_stats, 0,
+   
VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX * sizeof(struct qp_statistics));
+   }
}
}

@@ -1131,13 +1146,13 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf 
*m, uint32_t qp_idx)
&m, 1);
if (enable_stats) {
rte_atomic64_add(
-   
&dev_statistics[tdev->device_fh].rx_total_atomic,
+   
&dev_statistics[tdev->device_fh].qp_stats[qp_idx].rx_total_atomic,
1);
rte_atomic64_add(
-   
&dev_statistics[tdev->device_fh].rx_atomic,
+   
&dev_statistics[tdev->device_fh].qp_stats[qp_idx].rx_atomic,
ret);
-   
dev_statistics[tdev->device_fh].tx_total++;
-   dev_statistics[tdev->device_fh].tx += 
ret;
+   
dev_statistics[dev->device_fh].qp_stats[qp_idx].tx_total++;
+   
dev_statistics[dev->device_fh].qp_stats[qp_idx].tx += ret;
}
}

@@ -1271,8 +1286,8 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf 
*m,
tx_q->m_table[len] = m;
len++;
if (enable_stats) {
-   dev_statistics[dev->device_fh].tx_total++;
-   dev_statistics[dev->device_fh].tx++;
+   dev_statistics[dev->device_fh].qp_stats[qp_idx].tx_total++;
+   dev_statistics[dev->device_fh].qp_stats[qp_idx].tx++;
}

if (unlikely(len == MAX_PKT_BURST)) {
@@ -1403,10 +1418,10 @@ switch_worker(__attribute__((unused)) void *arg)

pkts_burst, rx_count);
if (enable_stats) {
rte_atomic64_add(
-   
&dev_statistics[dev_ll->vdev->dev->device_fh].rx_total_atomic,
+   
&dev_statistics[dev_ll->vdev->dev->device_fh].qp_stats[qp_idx].rx_total_atomic,
rx_count);
rte_atomic64_add(
-   
&dev_statistics[dev_ll->vdev->dev->device_fh].rx_atomic, ret_count);
+   
&dev_statistics[dev_ll->vdev->dev->device_fh].qp_stats[qp_idx].rx_atomic, 
ret_count);
}
while (likely(rx_count)) {
rx_count--;
@@ -1954,8 +1969,8 @@ virtio_tx_route_zcp(st

[dpdk-dev] ixgbe vPMD question

2015-09-18 Thread Bruce Richardson
On Thu, Sep 17, 2015 at 09:28:31PM +0100, Zoltan Kiss wrote:
> Hi,
> 
> The recv function does a prefetch on cacheline1, however it seems to me that
> rx_pkts[pos] should be uninitialized pointer at that time:
> 
> http://dpdk.org/browse/dpdk/tree/drivers/net/ixgbe/ixgbe_rxtx_vec.c#n287
> 
> So I guess it prefetches only random value. Or am I missing something?
> 
> Regards,
> 
> Zoltan

>From a look at the code, you may indeed by right here. We'll see about fixing
that up in a future patch.

/Bruce



[dpdk-dev] [PATCH] lib: rte_*_create gives NULL/EEXIST on duped name

2015-09-18 Thread Yoni Fogel
Also fixed a bug in many of them where if the rte_malloc of
the TAILQ fails, then we return a pointer to some arbitrary
existing struct.
---
 lib/librte_acl/rte_acl.c  | 53 +--
 lib/librte_hash/rte_cuckoo_hash.c |  6 +++--
 lib/librte_hash/rte_fbk_hash.c|  5 +++-
 lib/librte_lpm/rte_lpm.c  |  5 +++-
 lib/librte_lpm/rte_lpm6.c |  5 +++-
 5 files changed, 44 insertions(+), 30 deletions(-)

diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
index d60219f..f591556 100644
--- a/lib/librte_acl/rte_acl.c
+++ b/lib/librte_acl/rte_acl.c
@@ -213,37 +213,40 @@ rte_acl_create(const struct rte_acl_param *param)
break;
}

+   ctx = NULL;
+   if (te != NULL) {
+   rte_errno = EEXIST;
+   goto exit;
+   }
+
/* if ACL with such name doesn't exist, then create a new one. */
-   if (te == NULL) {
-   ctx = NULL;
-   te = rte_zmalloc("ACL_TAILQ_ENTRY", sizeof(*te), 0);
+   te = rte_zmalloc("ACL_TAILQ_ENTRY", sizeof(*te), 0);

-   if (te == NULL) {
-   RTE_LOG(ERR, ACL, "Cannot allocate tailq entry!\n");
-   goto exit;
-   }
+   if (te == NULL) {
+   RTE_LOG(ERR, ACL, "Cannot allocate tailq entry!\n");
+   goto exit;
+   }

-   ctx = rte_zmalloc_socket(name, sz, RTE_CACHE_LINE_SIZE, 
param->socket_id);
+   ctx = rte_zmalloc_socket(name, sz, RTE_CACHE_LINE_SIZE, 
param->socket_id);

-   if (ctx == NULL) {
-   RTE_LOG(ERR, ACL,
-   "allocation of %zu bytes on socket %d for %s 
failed\n",
-   sz, param->socket_id, name);
-   rte_free(te);
-   goto exit;
-   }
-   /* init new allocated context. */
-   ctx->rules = ctx + 1;
-   ctx->max_rules = param->max_rule_num;
-   ctx->rule_sz = param->rule_size;
-   ctx->socket_id = param->socket_id;
-   ctx->alg = rte_acl_default_classify;
-   snprintf(ctx->name, sizeof(ctx->name), "%s", param->name);
+   if (ctx == NULL) {
+   RTE_LOG(ERR, ACL,
+   "allocation of %zu bytes on socket %d for %s failed\n",
+   sz, param->socket_id, name);
+   rte_free(te);
+   goto exit;
+   }
+   /* init new allocated context. */
+   ctx->rules = ctx + 1;
+   ctx->max_rules = param->max_rule_num;
+   ctx->rule_sz = param->rule_size;
+   ctx->socket_id = param->socket_id;
+   ctx->alg = rte_acl_default_classify;
+   snprintf(ctx->name, sizeof(ctx->name), "%s", param->name);

-   te->data = (void *) ctx;
+   te->data = (void *) ctx;

-   TAILQ_INSERT_TAIL(acl_list, te, next);
-   }
+   TAILQ_INSERT_TAIL(acl_list, te, next);

 exit:
rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
diff --git a/lib/librte_hash/rte_cuckoo_hash.c 
b/lib/librte_hash/rte_cuckoo_hash.c
index 7019763..fe5a79e 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -206,8 +206,10 @@ rte_hash_create(const struct rte_hash_parameters *params)

/* Guarantee there's no existing */
h = rte_hash_find_existing(params->name);
-   if (h != NULL)
-   return h;
+   if (h != NULL) {
+   rte_errno = EEXIST;
+   return NULL;
+   }

te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);
if (te == NULL) {
diff --git a/lib/librte_hash/rte_fbk_hash.c b/lib/librte_hash/rte_fbk_hash.c
index 8752a47..55c9f35 100644
--- a/lib/librte_hash/rte_fbk_hash.c
+++ b/lib/librte_hash/rte_fbk_hash.c
@@ -140,8 +140,11 @@ rte_fbk_hash_create(const struct rte_fbk_hash_params 
*params)
if (strncmp(params->name, ht->name, RTE_FBK_HASH_NAMESIZE) == 0)
break;
}
-   if (te != NULL)
+   ht = NULL;
+   if (te != NULL) {
+   rte_errno = EEXIST;
goto exit;
+   }

te = rte_zmalloc("FBK_HASH_TAILQ_ENTRY", sizeof(*te), 0);
if (te == NULL) {
diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c
index 163ba3c..ea3cd44 100644
--- a/lib/librte_lpm/rte_lpm.c
+++ b/lib/librte_lpm/rte_lpm.c
@@ -181,8 +181,11 @@ rte_lpm_create(const char *name, int socket_id, int 
max_rules,
if (strncmp(name, lpm->name, RTE_LPM_NAMESIZE) == 0)
break;
}
-   if (te != NULL)
+   lpm = NULL;
+   if (te != NULL) {
+   rte_errno = EEXIST;
goto exit;
+   }

/* allocate tailq entry */
te = rte_zmalloc("LPM_TAILQ_ENTRY", sizeof(*te), 0);
diff --git a/lib/librte_lpm/rte_lpm6.c b/lib/librte_lpm/rte_lpm6.c
index 6c2b293..ff0

[dpdk-dev] [PATCH 0/7] Add hierarchical support to make install

2015-09-18 Thread Mario Carrillo
DPDK package lacks of a mechanism to install libraries, headers
applications and kernel modules to a file system tree.

This patch set allows to install files according to the next
proposal:
http://www.freedesktop.org/software/systemd/man/file-hierarchy.html

By adding a parameter H=1 (hierarchy-file) to makefile system, it is
possible to do the next steps

make config T=TARGET
make
make install H=1

and files will be installed on the proper directory. Also you can use
the DESTDIR variable.

Mario Carrillo (7):
  mk: Add hierarchy-file support
  mk: Add hierarchy-file support (sbin)
  mk: Add hierarchy-file support (doc)
  mk: Add hierarchy-file support (app)
  mk: Add hierarchy-file support (include)
  mk: Add hierarchy-file support (lib)
  mk: Add hierarchy-file support (linux mod)

 mk/internal/rte.install-post.mk | 15 +++-
 mk/rte.app.mk   |  8 +++
 mk/rte.lib.mk   | 16 +
 mk/rte.module.mk|  8 +++
 mk/rte.sdkinstall.mk| 52 +
 5 files changed, 98 insertions(+), 1 deletion(-)

-- 
2.1.0



[dpdk-dev] [PATCH 1/7] mk: Add hierarchy-file support

2015-09-18 Thread Mario Carrillo
Add hierarchy-file support to the DPDK scripts, tools, examples,
makefiles and config files when invoking "make install H=1"
(hierarchy-file)

This hierarchy is based on:
http://www.freedesktop.org/software/systemd/man/file-hierarchy.html
and dpdk spec file.

scripts, tools, examples, makefiles and config files will be installed
in: $(DESTDIR)/usr/share/dpdk

Signed-off-by: Mario Carrillo 
---
 mk/rte.sdkinstall.mk | 34 ++
 1 file changed, 34 insertions(+)

diff --git a/mk/rte.sdkinstall.mk b/mk/rte.sdkinstall.mk
index 86c98a5..3b8169b 100644
--- a/mk/rte.sdkinstall.mk
+++ b/mk/rte.sdkinstall.mk
@@ -43,6 +43,23 @@ ifndef T
 T=*
 endif

+# When H (hierarchy-file) varible is equal "1" it will install files
+# according to the next proposal:
+# http://www.freedesktop.org/software/systemd/man/file-hierarchy.html
+# some variables get values from .config in order not to set a target again.
+ifeq ($(H),1)
+include $(BUILD_DIR)/build/.config
+RTE_ARCH := $(CONFIG_RTE_ARCH:"%"=%)
+RTE_MACHINE := $(CONFIG_RTE_MACHINE:"%"=%)
+RTE_EXEC_ENV := $(CONFIG_RTE_EXEC_ENV:"%"=%)
+RTE_TOOLCHAIN := $(CONFIG_RTE_TOOLCHAIN:"%"=%)
+RTE_TARGET := $(RTE_ARCH)-$(RTE_MACHINE)-$(RTE_EXEC_ENV)-$(RTE_TOOLCHAIN)
+T := $(RTE_ARCH)-$(RTE_MACHINE)-$(RTE_EXEC_ENV)-$(RTE_TOOLCHAIN)
+DATA_DIR := $(DESTDIR)/usr/share
+INCLUDE_DIR := $(DESTDIR)/usr/include
+export INCLUDE_DIR
+endif
+
 #
 # install: build sdk for all supported targets
 #
@@ -56,6 +73,22 @@ install: $(INSTALL_TARGETS)

 %_install:
@echo == Installing $*
+#
+# Install config and environment files
+# according to freedesktop hierarchy-file when H=1
+#
+ifeq ($(H),1)
+   $(Q)$(MAKE) all O=$(BUILD_DIR)/build
+   @[ -d $(DATA_DIR)/dpdk ] || mkdir -p $(DATA_DIR)/dpdk
+   $(Q)cp -a $(BUILD_DIR)/mk $(DATA_DIR)/dpdk
+   $(Q)cp -a $(BUILD_DIR)/scripts $(DATA_DIR)/dpdk
+   $(Q)cp -a $(BUILD_DIR)/examples $(DATA_DIR)/dpdk
+   $(Q)cp -a $(BUILD_DIR)/tools $(DATA_DIR)/dpdk
+   @[ -d $(INCLUDE_DIR) ] || mkdir -p $(INCLUDE_DIR)
+   $(Q)cp -f  $(BUILD_DIR)/build/include/rte_config.h $(INCLUDE_DIR)
+   @[ -d $(DATA_DIR)/dpdk/config ] || mkdir -p $(DATA_DIR)/dpdk/config
+   $(Q)cp -f $(BUILD_DIR)/build/.config $(DATA_DIR)/dpdk/config
+else
$(Q)if [ ! -f $(BUILD_DIR)/$*/.config ]; then \
$(MAKE) config T=$* O=$(BUILD_DIR)/$*; \
elif cmp -s $(BUILD_DIR)/$*/.config.orig $(BUILD_DIR)/$*/.config; then \
@@ -79,6 +112,7 @@ install: $(INSTALL_TARGETS)
 UNINSTALL_TARGETS := $(addsuffix _uninstall,\
$(filter-out %~,$(INSTALL_CONFIGS)))

+endif
 .PHONY: uninstall
 uninstall: $(UNINSTALL_TARGETS)

-- 
2.1.0



[dpdk-dev] [PATCH 2/7] mk: Add hierarchy-file support (sbin)

2015-09-18 Thread Mario Carrillo
Add hierarchy-file support to the DPDK bind scripts,
when invoking "make install H=1" (hierarchy-file)

This hierarchy is based on:
http://www.freedesktop.org/software/systemd/man/file-hierarchy.html
and dpdk spec file

bind scripts will be installed in:
$(DESTDIR)/usr/sbin/dpdk_nic_bind

Signed-off-by: Mario Carrillo 
---
 mk/rte.sdkinstall.mk | 4 
 1 file changed, 4 insertions(+)

diff --git a/mk/rte.sdkinstall.mk b/mk/rte.sdkinstall.mk
index 3b8169b..2028a8b 100644
--- a/mk/rte.sdkinstall.mk
+++ b/mk/rte.sdkinstall.mk
@@ -57,7 +57,9 @@ RTE_TARGET := 
$(RTE_ARCH)-$(RTE_MACHINE)-$(RTE_EXEC_ENV)-$(RTE_TOOLCHAIN)
 T := $(RTE_ARCH)-$(RTE_MACHINE)-$(RTE_EXEC_ENV)-$(RTE_TOOLCHAIN)
 DATA_DIR := $(DESTDIR)/usr/share
 INCLUDE_DIR := $(DESTDIR)/usr/include
+SBIN_DIR := $(DESTDIR)/usr/sbin
 export INCLUDE_DIR
+export SBIN_DIR
 endif

 #
@@ -88,6 +90,8 @@ ifeq ($(H),1)
$(Q)cp -f  $(BUILD_DIR)/build/include/rte_config.h $(INCLUDE_DIR)
@[ -d $(DATA_DIR)/dpdk/config ] || mkdir -p $(DATA_DIR)/dpdk/config
$(Q)cp -f $(BUILD_DIR)/build/.config $(DATA_DIR)/dpdk/config
+   @[ -d $(SBIN_DIR)/dpdk_nic_bind ] || mkdir -p $(SBIN_DIR)/dpdk_nic_bind
+   $(Q)cp -f $(BUILD_DIR)/tools/*nic_bind.py $(SBIN_DIR)/dpdk_nic_bind
 else
$(Q)if [ ! -f $(BUILD_DIR)/$*/.config ]; then \
$(MAKE) config T=$* O=$(BUILD_DIR)/$*; \
-- 
2.1.0



[dpdk-dev] [PATCH 3/7] mk: Add hierarchy-file support (doc)

2015-09-18 Thread Mario Carrillo
Add hierarchy-file support to the DPDK documentation,
when invoking "make install H=1" (hierarchy-file)

This hierarchy is based on:
http://www.freedesktop.org/software/systemd/man/file-hierarchy.html
and dpdk spec file

documentation will be installed in:
$(DESTDIR)/usr/share/doc/dpdk

Signed-off-by: Mario Carrillo 
---
 mk/rte.sdkinstall.mk | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mk/rte.sdkinstall.mk b/mk/rte.sdkinstall.mk
index 2028a8b..41b8485 100644
--- a/mk/rte.sdkinstall.mk
+++ b/mk/rte.sdkinstall.mk
@@ -92,6 +92,8 @@ ifeq ($(H),1)
$(Q)cp -f $(BUILD_DIR)/build/.config $(DATA_DIR)/dpdk/config
@[ -d $(SBIN_DIR)/dpdk_nic_bind ] || mkdir -p $(SBIN_DIR)/dpdk_nic_bind
$(Q)cp -f $(BUILD_DIR)/tools/*nic_bind.py $(SBIN_DIR)/dpdk_nic_bind
+   @[ -d $(DATA_DIR)/doc/dpdk/ ] || mkdir -p $(DATA_DIR)/doc/dpdk/
+   $(Q)cp -a $(BUILD_DIR)/doc/* $(DATA_DIR)/doc/dpdk
 else
$(Q)if [ ! -f $(BUILD_DIR)/$*/.config ]; then \
$(MAKE) config T=$* O=$(BUILD_DIR)/$*; \
-- 
2.1.0



[dpdk-dev] [PATCH 4/7] mk: Add hierarchy-file support (app)

2015-09-18 Thread Mario Carrillo
Add hierarchy-file support to the DPDK app files,
when invoking "make install H=1" (hierarchy-file)

This hierarchy is based on:
http://www.freedesktop.org/software/systemd/man/file-hierarchy.html

app files will be installed in: $(DESTDIR)/usr/bin

Signed-off-by: Mario Carrillo 
---
 mk/rte.app.mk| 8 
 mk/rte.sdkinstall.mk | 2 ++
 2 files changed, 10 insertions(+)

diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 9e1909e..ae3328c 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -155,6 +155,14 @@ LDLIBS += $(_LDLIBS-y) $(CPU_LDLIBS) $(EXTRA_LDLIBS)

 .PHONY: all
 all: install
+#
+# if H (hierarchy-file) varible is equal "1"
+# install app in /usr/bin/ too.
+#
+ifeq ($(H),1)
+   @[ -d $(BIN_DIR) ] || mkdir -p $(BIN_DIR)
+   $(Q)cp -f $(RTE_OUTPUT)/app/$(APP) $(BIN_DIR)
+endif

 .PHONY: install
 install: build _postinstall
diff --git a/mk/rte.sdkinstall.mk b/mk/rte.sdkinstall.mk
index 41b8485..723fd97 100644
--- a/mk/rte.sdkinstall.mk
+++ b/mk/rte.sdkinstall.mk
@@ -58,8 +58,10 @@ T := 
$(RTE_ARCH)-$(RTE_MACHINE)-$(RTE_EXEC_ENV)-$(RTE_TOOLCHAIN)
 DATA_DIR := $(DESTDIR)/usr/share
 INCLUDE_DIR := $(DESTDIR)/usr/include
 SBIN_DIR := $(DESTDIR)/usr/sbin
+BIN_DIR := $(DESTDIR)/usr/bin
 export INCLUDE_DIR
 export SBIN_DIR
+export BIN_DIR
 endif

 #
-- 
2.1.0



[dpdk-dev] [PATCH 5/7] mk: Add hierarchy-file support (include)

2015-09-18 Thread Mario Carrillo
Add hierarchy-file support to the DPDK headers,
when invoking "make install H=1" (hierarchy-file)

This hierarchy is based on:
http://www.freedesktop.org/software/systemd/man/file-hierarchy.html

headers will be installed in: $(DESTDIR)/usr/include

Signed-off-by: Mario Carrillo 
---
 mk/internal/rte.install-post.mk | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/mk/internal/rte.install-post.mk b/mk/internal/rte.install-post.mk
index 77addee..20c6a8b 100644
--- a/mk/internal/rte.install-post.mk
+++ b/mk/internal/rte.install-post.mk
@@ -63,9 +63,22 @@ $(addprefix $(RTE_OUTPUT)/$(1)/,$(notdir $(2))): $(2)
$(RTE_OUTPUT)/$(1)
 endef

+#
+# generate rules to install headers in
+# /usr/include
+#
+define copy_header_dir_rule
+ifeq ($(H),1)
+HEADER_DIR := $$(shell echo $(1) | sed 's/include\/*//')
+$$(shell [ -d $(INCLUDE_DIR)/$(HEADER_DIR) ] || mkdir -p 
$(INCLUDE_DIR)/$(HEADER_DIR))
+$$(shell cp -rf $(VPATH)/$(2) $(INCLUDE_DIR)/$(HEADER_DIR))
+endif
+endef
+
 $(foreach dir,$(SYMLINK-DIRS-y),\
$(foreach file,$(SYMLINK-y-$(dir)),\
-   $(eval $(call symlink_rule,$(dir),$(file)
+   $(eval $(call symlink_rule,$(dir),$(file))) \
+   $(eval $(call copy_header_dir_rule,$(dir),$(file)


 # fast way, no need to do preinstall and postinstall
-- 
2.1.0



[dpdk-dev] [PATCH 6/7] mk: Add hierarchy-file support (lib)

2015-09-18 Thread Mario Carrillo
Add hierarchy-file support to the DPDK libs,
when invoking "make install H=1" (hierarchy-file)

This hierarchy is based on:
http://www.freedesktop.org/software/systemd/man/file-hierarchy.html

for this case, if the architecture is 64 bits libs will be
instaled in: $(DESTDIR)/usr/lib64 else it will be $(DESTDIR)/usr/lib

Signed-off-by: Mario Carrillo 
---
 mk/rte.lib.mk| 16 
 mk/rte.sdkinstall.mk |  6 ++
 2 files changed, 22 insertions(+)

diff --git a/mk/rte.lib.mk b/mk/rte.lib.mk
index fcc8e20..3b3136a 100644
--- a/mk/rte.lib.mk
+++ b/mk/rte.lib.mk
@@ -53,6 +53,22 @@ _CLEAN = doclean

 .PHONY: all
 all: install
+#
+# if H (hierarchy-file) varible is equal "1"
+# according to architecture. it will install files 
+# in /usr/lib64/ or /usr/lib 
+#
+ifeq ($(H),1)
+   @[ -d $(LIB_DIR) ] || mkdir -p $(LIB_DIR)
+   $(Q)cp -f $(LIB) $(LIB_DIR)
+ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),y)
+ifeq ($(CONFIG_RTE_NEXT_ABI),y)
+   $(Q)ln -s -f $(LIB) $(basename $(basename $(LIB_DIR)/$(LIB)))
+else
+   $(Q)ln -s -f $(LIB) $(basename $(basename $(LIB_DIR)/$(LIB)))
+endif
+endif
+endif

 .PHONY: install
 install: build _postinstall
diff --git a/mk/rte.sdkinstall.mk b/mk/rte.sdkinstall.mk
index 723fd97..b506959 100644
--- a/mk/rte.sdkinstall.mk
+++ b/mk/rte.sdkinstall.mk
@@ -59,9 +59,15 @@ DATA_DIR := $(DESTDIR)/usr/share
 INCLUDE_DIR := $(DESTDIR)/usr/include
 SBIN_DIR := $(DESTDIR)/usr/sbin
 BIN_DIR := $(DESTDIR)/usr/bin
+ifeq ($(RTE_ARCH),x86_64)
+LIB_DIR := $(DESTDIR)/usr/lib64
+else
+LIB_DIR := $(DESTDIR)/usr/lib
+endif
 export INCLUDE_DIR
 export SBIN_DIR
 export BIN_DIR
+export LIB_DIR
 endif

 #
-- 
2.1.0



[dpdk-dev] [PATCH 7/7] mk: Add hierarchy-file support (linux mod)

2015-09-18 Thread Mario Carrillo
Add hierarchy-file support to the DPDK modules for linux,
when invoking "make install H=1" (hierarchy-file)

This hierarchy is based on:
http://www.freedesktop.org/software/systemd/man/file-hierarchy.html

headers will be installed in: $(DESTDIR)/lib/modules

Signed-off-by: Mario Carrillo 
---
 mk/rte.module.mk | 8 
 mk/rte.sdkinstall.mk | 4 
 2 files changed, 12 insertions(+)

diff --git a/mk/rte.module.mk b/mk/rte.module.mk
index 7bf77c1..4fc43ba 100644
--- a/mk/rte.module.mk
+++ b/mk/rte.module.mk
@@ -59,6 +59,14 @@ compare = $(strip $(subst $(1),,$(2)) $(subst $(2),,$(1)))

 .PHONY: all
 all: install
+#
+# if H (hierarchy-file) varible is equal "1"
+# install modules in /lib/modules/$(KERNEL_DIR).
+#
+ifeq ($(H),1)
+   @[ -d $(MOD_DIR)/$(KERNEL_DIR) ] || mkdir -p $(MOD_DIR)/$(KERNEL_DIR)
+   $(Q)cp -f $(MODULE).ko $(MOD_DIR)/$(KERNEL_DIR)
+endif

 .PHONY: install
 install: build _postinstall
diff --git a/mk/rte.sdkinstall.mk b/mk/rte.sdkinstall.mk
index b506959..5ddc3f7 100644
--- a/mk/rte.sdkinstall.mk
+++ b/mk/rte.sdkinstall.mk
@@ -59,6 +59,8 @@ DATA_DIR := $(DESTDIR)/usr/share
 INCLUDE_DIR := $(DESTDIR)/usr/include
 SBIN_DIR := $(DESTDIR)/usr/sbin
 BIN_DIR := $(DESTDIR)/usr/bin
+MOD_DIR := $(DESTDIR)/lib/modules
+KERNEL_DIR := $(shell uname -r)/extra
 ifeq ($(RTE_ARCH),x86_64)
 LIB_DIR := $(DESTDIR)/usr/lib64
 else
@@ -68,6 +70,8 @@ export INCLUDE_DIR
 export SBIN_DIR
 export BIN_DIR
 export LIB_DIR
+export MOD_DIR
+export KERNEL_DIR
 endif

 #
-- 
2.1.0