This RFC is based on the vhost multiple queues work on
dpdk-dev: http://dpdk.org/ml/archives/dev/2015-June/019345.html
Signed-off-by: Flavio Leitner <f...@redhat.com>
---
lib/netdev-dpdk.c | 61 ++++++++++++++++++++++++++++++++++++-------------------
1 file changed, 40 insertions(+), 21 deletions(-)
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 5ae805e..493172c 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -215,12 +215,9 @@ struct netdev_dpdk {
* If the numbers match, 'txq_needs_locking' is false, otherwise it is
* true and we will take a spinlock on transmission */
int real_n_txq;
+ int real_n_rxq;
bool txq_needs_locking;
- /* Spinlock for vhost transmission. Other DPDK devices use spinlocks in
- * dpdk_tx_queue */
- rte_spinlock_t vhost_tx_lock;
-
/* virtio-net structure for vhost device */
OVSRCU_TYPE(struct virtio_net *) virtio_dev;
@@ -602,13 +599,10 @@ dpdk_dev_parse_name(const char dev_name[], const char
prefix[],
static int
vhost_construct_helper(struct netdev *netdev_) OVS_REQUIRES(dpdk_mutex)
{
- struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
-
if (rte_eal_init_ret) {
return rte_eal_init_ret;
}
- rte_spinlock_init(&netdev->vhost_tx_lock);
return netdev_dpdk_init(netdev_, -1, DPDK_DEV_VHOST);
}
@@ -791,9 +785,16 @@ netdev_dpdk_vhost_set_multiq(struct netdev *netdev_,
unsigned int n_txq,
ovs_mutex_lock(&dpdk_mutex);
ovs_mutex_lock(&netdev->mutex);
+ rte_free(netdev->tx_q);
+ /* FIXME: the number of vqueues needs to match */
netdev->up.n_txq = n_txq;
- netdev->real_n_txq = 1;
- netdev->up.n_rxq = 1;
+ netdev->up.n_rxq = n_rxq;
+
+ /* vring has txq = rxq */
+ netdev->real_n_txq = n_rxq;
+ netdev->real_n_rxq = n_rxq;
+ netdev->txq_needs_locking = netdev->real_n_txq != netdev->up.n_txq;
+ netdev_dpdk_alloc_txq(netdev, netdev->up.n_txq);
ovs_mutex_unlock(&netdev->mutex);
ovs_mutex_unlock(&dpdk_mutex);
@@ -904,14 +905,14 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq_,
struct netdev *netdev = rx->up.netdev;
struct netdev_dpdk *vhost_dev = netdev_dpdk_cast(netdev);
struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(vhost_dev);
- int qid = 1;
+ int qid = rxq_->queue_id;
uint16_t nb_rx = 0;
if (OVS_UNLIKELY(!is_vhost_running(virtio_dev))) {
return EAGAIN;
}
- nb_rx = rte_vhost_dequeue_burst(virtio_dev, qid,
+ nb_rx = rte_vhost_dequeue_burst(virtio_dev, VIRTIO_TXQ + qid * 2,
vhost_dev->dpdk_mp->mp,
(struct rte_mbuf **)packets,
NETDEV_MAX_BURST);
@@ -958,8 +959,9 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct
dp_packet **packets,
}
static void
-__netdev_dpdk_vhost_send(struct netdev *netdev, struct dp_packet **pkts,
- int cnt, bool may_steal)
+__netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
+ struct dp_packet **pkts, int cnt,
+ bool may_steal)
{
struct netdev_dpdk *vhost_dev = netdev_dpdk_cast(netdev);
struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(vhost_dev);
@@ -974,13 +976,16 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, struct
dp_packet **pkts,
goto out;
}
- /* There is vHost TX single queue, So we need to lock it for TX. */
- rte_spinlock_lock(&vhost_dev->vhost_tx_lock);
+ if (vhost_dev->txq_needs_locking) {
+ qid = qid % vhost_dev->real_n_txq;
+ rte_spinlock_lock(&vhost_dev->tx_q[qid].tx_lock);
+ }
do {
+ int vhost_qid = VIRTIO_RXQ + qid * VIRTIO_QNUM;
unsigned int tx_pkts;
- tx_pkts = rte_vhost_enqueue_burst(virtio_dev, VIRTIO_RXQ,
+ tx_pkts = rte_vhost_enqueue_burst(virtio_dev, vhost_qid,
cur_pkts, cnt);
if (OVS_LIKELY(tx_pkts)) {
/* Packets have been sent.*/
@@ -999,7 +1004,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, struct
dp_packet **pkts,
* Unable to enqueue packets to vhost interface.
* Check available entries before retrying.
*/
- while (!rte_vring_available_entries(virtio_dev, VIRTIO_RXQ)) {
+ while (!rte_vring_available_entries(virtio_dev, vhost_qid)) {
if (OVS_UNLIKELY((rte_get_timer_cycles() - start) > timeout))
{
expired = 1;
break;
@@ -1011,7 +1016,10 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, struct
dp_packet **pkts,
}
}
} while (cnt);
- rte_spinlock_unlock(&vhost_dev->vhost_tx_lock);
+
+ if (vhost_dev->txq_needs_locking) {
+ rte_spinlock_unlock(&vhost_dev->tx_q[qid].tx_lock);
+ }
rte_spinlock_lock(&vhost_dev->stats_lock);
vhost_dev->stats.tx_packets += (total_pkts - cnt);
@@ -1116,7 +1124,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct
dp_packet **pkts,
}
if (dev->type == DPDK_DEV_VHOST) {
- __netdev_dpdk_vhost_send(netdev, (struct dp_packet **) mbufs, newcnt,
true);
+ __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) mbufs,
newcnt, true);
} else {
dpdk_queue_pkts(dev, qid, mbufs, newcnt);
dpdk_queue_flush(dev, qid);
@@ -1128,7 +1136,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct
dp_packet **pkts,
}
static int
-netdev_dpdk_vhost_send(struct netdev *netdev, int qid OVS_UNUSED, struct
dp_packet **pkts,
+netdev_dpdk_vhost_send(struct netdev *netdev, int qid, struct dp_packet **pkts,
int cnt, bool may_steal)
{
if (OVS_UNLIKELY(pkts[0]->source != DPBUF_DPDK)) {
@@ -1141,7 +1149,7 @@ netdev_dpdk_vhost_send(struct netdev *netdev, int qid
OVS_UNUSED, struct dp_pack
}
}
} else {
- __netdev_dpdk_vhost_send(netdev, pkts, cnt, may_steal);
+ __netdev_dpdk_vhost_send(netdev, qid, pkts, cnt, may_steal);
}
return 0;
}
@@ -1656,7 +1664,18 @@ new_device(struct virtio_net *dev)
/* Add device to the vhost port with the same name as that passed down. */
LIST_FOR_EACH(netdev, list_node, &dpdk_list) {
if (strncmp(dev->ifname, netdev->vhost_id, IF_NAME_SZ) == 0) {
+ int qp_num = rte_vhost_qp_num_get(dev);