This RFC is based on the vhost multiple queues work on dpdk-dev: http://dpdk.org/ml/archives/dev/2015-June/019345.html
Signed-off-by: Flavio Leitner <f...@redhat.com> --- lib/netdev-dpdk.c | 61 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 5ae805e..493172c 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -215,12 +215,9 @@ struct netdev_dpdk { * If the numbers match, 'txq_needs_locking' is false, otherwise it is * true and we will take a spinlock on transmission */ int real_n_txq; + int real_n_rxq; bool txq_needs_locking; - /* Spinlock for vhost transmission. Other DPDK devices use spinlocks in - * dpdk_tx_queue */ - rte_spinlock_t vhost_tx_lock; - /* virtio-net structure for vhost device */ OVSRCU_TYPE(struct virtio_net *) virtio_dev; @@ -602,13 +599,10 @@ dpdk_dev_parse_name(const char dev_name[], const char prefix[], static int vhost_construct_helper(struct netdev *netdev_) OVS_REQUIRES(dpdk_mutex) { - struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); - if (rte_eal_init_ret) { return rte_eal_init_ret; } - rte_spinlock_init(&netdev->vhost_tx_lock); return netdev_dpdk_init(netdev_, -1, DPDK_DEV_VHOST); } @@ -791,9 +785,16 @@ netdev_dpdk_vhost_set_multiq(struct netdev *netdev_, unsigned int n_txq, ovs_mutex_lock(&dpdk_mutex); ovs_mutex_lock(&netdev->mutex); + rte_free(netdev->tx_q); + /* FIXME: the number of vqueues needs to match */ netdev->up.n_txq = n_txq; - netdev->real_n_txq = 1; - netdev->up.n_rxq = 1; + netdev->up.n_rxq = n_rxq; + + /* vring has txq = rxq */ + netdev->real_n_txq = n_rxq; + netdev->real_n_rxq = n_rxq; + netdev->txq_needs_locking = netdev->real_n_txq != netdev->up.n_txq; + netdev_dpdk_alloc_txq(netdev, netdev->up.n_txq); ovs_mutex_unlock(&netdev->mutex); ovs_mutex_unlock(&dpdk_mutex); @@ -904,14 +905,14 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq_, struct netdev *netdev = rx->up.netdev; struct netdev_dpdk *vhost_dev = netdev_dpdk_cast(netdev); struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(vhost_dev); - int qid = 1; + int qid = rxq_->queue_id; uint16_t nb_rx = 0; if (OVS_UNLIKELY(!is_vhost_running(virtio_dev))) { return EAGAIN; } - nb_rx = rte_vhost_dequeue_burst(virtio_dev, qid, + nb_rx = rte_vhost_dequeue_burst(virtio_dev, VIRTIO_TXQ + qid * 2, vhost_dev->dpdk_mp->mp, (struct rte_mbuf **)packets, NETDEV_MAX_BURST); @@ -958,8 +959,9 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets, } static void -__netdev_dpdk_vhost_send(struct netdev *netdev, struct dp_packet **pkts, - int cnt, bool may_steal) +__netdev_dpdk_vhost_send(struct netdev *netdev, int qid, + struct dp_packet **pkts, int cnt, + bool may_steal) { struct netdev_dpdk *vhost_dev = netdev_dpdk_cast(netdev); struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(vhost_dev); @@ -974,13 +976,16 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, struct dp_packet **pkts, goto out; } - /* There is vHost TX single queue, So we need to lock it for TX. */ - rte_spinlock_lock(&vhost_dev->vhost_tx_lock); + if (vhost_dev->txq_needs_locking) { + qid = qid % vhost_dev->real_n_txq; + rte_spinlock_lock(&vhost_dev->tx_q[qid].tx_lock); + } do { + int vhost_qid = VIRTIO_RXQ + qid * VIRTIO_QNUM; unsigned int tx_pkts; - tx_pkts = rte_vhost_enqueue_burst(virtio_dev, VIRTIO_RXQ, + tx_pkts = rte_vhost_enqueue_burst(virtio_dev, vhost_qid, cur_pkts, cnt); if (OVS_LIKELY(tx_pkts)) { /* Packets have been sent.*/ @@ -999,7 +1004,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, struct dp_packet **pkts, * Unable to enqueue packets to vhost interface. * Check available entries before retrying. */ - while (!rte_vring_available_entries(virtio_dev, VIRTIO_RXQ)) { + while (!rte_vring_available_entries(virtio_dev, vhost_qid)) { if (OVS_UNLIKELY((rte_get_timer_cycles() - start) > timeout)) { expired = 1; break; @@ -1011,7 +1016,10 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, struct dp_packet **pkts, } } } while (cnt); - rte_spinlock_unlock(&vhost_dev->vhost_tx_lock); + + if (vhost_dev->txq_needs_locking) { + rte_spinlock_unlock(&vhost_dev->tx_q[qid].tx_lock); + } rte_spinlock_lock(&vhost_dev->stats_lock); vhost_dev->stats.tx_packets += (total_pkts - cnt); @@ -1116,7 +1124,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts, } if (dev->type == DPDK_DEV_VHOST) { - __netdev_dpdk_vhost_send(netdev, (struct dp_packet **) mbufs, newcnt, true); + __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) mbufs, newcnt, true); } else { dpdk_queue_pkts(dev, qid, mbufs, newcnt); dpdk_queue_flush(dev, qid); @@ -1128,7 +1136,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts, } static int -netdev_dpdk_vhost_send(struct netdev *netdev, int qid OVS_UNUSED, struct dp_packet **pkts, +netdev_dpdk_vhost_send(struct netdev *netdev, int qid, struct dp_packet **pkts, int cnt, bool may_steal) { if (OVS_UNLIKELY(pkts[0]->source != DPBUF_DPDK)) { @@ -1141,7 +1149,7 @@ netdev_dpdk_vhost_send(struct netdev *netdev, int qid OVS_UNUSED, struct dp_pack } } } else { - __netdev_dpdk_vhost_send(netdev, pkts, cnt, may_steal); + __netdev_dpdk_vhost_send(netdev, qid, pkts, cnt, may_steal); } return 0; } @@ -1656,7 +1664,18 @@ new_device(struct virtio_net *dev) /* Add device to the vhost port with the same name as that passed down. */ LIST_FOR_EACH(netdev, list_node, &dpdk_list) { if (strncmp(dev->ifname, netdev->vhost_id, IF_NAME_SZ) == 0) { + int qp_num = rte_vhost_qp_num_get(dev); ovs_mutex_lock(&netdev->mutex); + if (qp_num != netdev->real_n_rxq) { + VLOG_INFO("vHost Device '%s' (%ld) can't be added - " + "unmatched number of queues %d and %d", + dev->ifname, dev->device_fh, qp_num, + netdev->real_n_rxq); + ovs_mutex_unlock(&netdev->mutex); + ovs_mutex_unlock(&dpdk_mutex); + return -1; + } + ovsrcu_set(&netdev->virtio_dev, dev); ovs_mutex_unlock(&netdev->mutex); exists = true; -- 2.1.0 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev