Currently virtio driver in guest operating system have to be configured to use exactly same number of queues. If number of queues will be less, some packets will get stuck in queues unused by guest and will not be received.
Fix that by using new 'vring_state_changed' callback, which is available for vhost-user since DPDK 2.2. Implementation uses additional mapping from configured tx queues to enabled by virtio driver. This requires mandatory locking of TX queues in __netdev_dpdk_vhost_send(), but this locking was almost always anyway because of calling set_multiq with n_txq = 'ovs_numa_get_n_cores() + 1'. Fixes: 4573fbd38fa1 ("netdev-dpdk: Add vhost-user multiqueue support") Signed-off-by: Ilya Maximets <i.maxim...@samsung.com> Reviewed-by: Aaron Conole <acon...@redhat.com> Acked-by: Flavio Leitner <f...@sysclose.org> --- Version 3: * Fixed size in dpdk_rte_mzalloc() for enabled_queues. * Fixed possible segfault because of unallocated tx_q[]. * Added remap of tx queues after real_n_txq modification in netdev_dpdk_vhost_set_queues(). lib/netdev-dpdk.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 97 insertions(+), 15 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index e4f789b..3e69e50 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -173,6 +173,8 @@ struct dpdk_tx_queue { * from concurrent access. It is used only * if the queue is shared among different * pmd threads (see 'txq_needs_locking'). */ + int map; /* Mapping of configured vhost-user queues + * to enabled by guest. */ uint64_t tsc; struct rte_mbuf *burst_pkts[MAX_TX_QUEUE_LEN]; }; @@ -572,6 +574,9 @@ netdev_dpdk_alloc_txq(struct netdev_dpdk *netdev, unsigned int n_txqs) /* Queues are shared among CPUs. Always flush */ netdev->tx_q[i].flush_tx = true; } + + /* Initialize map for vhost devices. */ + netdev->tx_q[i].map = -1; rte_spinlock_init(&netdev->tx_q[i].tx_lock); } } @@ -623,6 +628,8 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no, if (err) { goto unlock; } + } else { + netdev_dpdk_alloc_txq(netdev, VHOST_MAX_QUEUE_PAIRS); } list_push_back(&dpdk_list, &netdev->list_node); @@ -889,10 +896,8 @@ netdev_dpdk_vhost_set_multiq(struct netdev *netdev_, unsigned int n_txq, ovs_mutex_lock(&dpdk_mutex); ovs_mutex_lock(&netdev->mutex); - rte_free(netdev->tx_q); netdev->up.n_txq = n_txq; netdev->up.n_rxq = n_rxq; - netdev_dpdk_alloc_txq(netdev, netdev->up.n_txq); ovs_mutex_unlock(&netdev->mutex); ovs_mutex_unlock(&dpdk_mutex); @@ -1117,17 +1122,16 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid, unsigned int total_pkts = cnt; uint64_t start = 0; - if (OVS_UNLIKELY(!is_vhost_running(virtio_dev))) { + qid = vhost_dev->tx_q[qid % vhost_dev->real_n_txq].map; + + if (OVS_UNLIKELY(!is_vhost_running(virtio_dev) || qid == -1)) { rte_spinlock_lock(&vhost_dev->stats_lock); vhost_dev->stats.tx_dropped+= cnt; rte_spinlock_unlock(&vhost_dev->stats_lock); goto out; } - if (vhost_dev->txq_needs_locking) { - qid = qid % vhost_dev->real_n_txq; - rte_spinlock_lock(&vhost_dev->tx_q[qid].tx_lock); - } + rte_spinlock_lock(&vhost_dev->tx_q[qid].tx_lock); do { int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ; @@ -1165,9 +1169,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid, } } while (cnt); - if (vhost_dev->txq_needs_locking) { - rte_spinlock_unlock(&vhost_dev->tx_q[qid].tx_lock); - } + rte_spinlock_unlock(&vhost_dev->tx_q[qid].tx_lock); rte_spinlock_lock(&vhost_dev->stats_lock); netdev_dpdk_vhost_update_tx_counters(&vhost_dev->stats, pkts, total_pkts, @@ -1827,6 +1829,46 @@ set_irq_status(struct virtio_net *dev) } } +/* + * Fixes mapping for vhost-user tx queues. Must be called after each + * enabling/disabling of queues and real_n_txq modifications. + */ +static void +netdev_dpdk_remap_txqs(struct netdev_dpdk *netdev) + OVS_REQUIRES(netdev->mutex) +{ + int *enabled_queues, n_enabled = 0; + int i, k, total_txqs = netdev->real_n_txq; + + enabled_queues = dpdk_rte_mzalloc(total_txqs * sizeof *enabled_queues); + + for (i = 0; i < total_txqs; i++) { + /* Enabled queues always mapped to themselves. */ + if (netdev->tx_q[i].map == i) { + enabled_queues[n_enabled++] = i; + } + } + + if (n_enabled == 0 && total_txqs != 0) { + enabled_queues[0] = -1; + n_enabled = 1; + } + + k = 0; + for (i = 0; i < total_txqs; i++) { + if (netdev->tx_q[i].map != i) { + netdev->tx_q[i].map = enabled_queues[k]; + k = (k + 1) % n_enabled; + } + } + + VLOG_DBG("TX queue mapping for %s\n", netdev->vhost_id); + for (i = 0; i < total_txqs; i++) { + VLOG_DBG("%2d --> %2d", i, netdev->tx_q[i].map); + } + + rte_free(enabled_queues); +} static int netdev_dpdk_vhost_set_queues(struct netdev_dpdk *netdev, struct virtio_net *dev) @@ -1843,11 +1885,9 @@ netdev_dpdk_vhost_set_queues(struct netdev_dpdk *netdev, struct virtio_net *dev) netdev->real_n_rxq = qp_num; netdev->real_n_txq = qp_num; - if (netdev->up.n_txq > netdev->real_n_txq) { - netdev->txq_needs_locking = true; - } else { - netdev->txq_needs_locking = false; - } + netdev->txq_needs_locking = true; + + netdev_dpdk_remap_txqs(netdev); return 0; } @@ -1941,6 +1981,47 @@ destroy_device(volatile struct virtio_net *dev) } +static int +vring_state_changed(struct virtio_net *dev, uint16_t queue_id, int enable) +{ + struct netdev_dpdk *vhost_dev; + bool exists = false; + int qid = queue_id / VIRTIO_QNUM; + + if (queue_id % VIRTIO_QNUM == VIRTIO_TXQ) { + return 0; + } + + ovs_mutex_lock(&dpdk_mutex); + LIST_FOR_EACH (vhost_dev, list_node, &dpdk_list) { + if (strncmp(dev->ifname, vhost_dev->vhost_id, IF_NAME_SZ) == 0) { + ovs_mutex_lock(&vhost_dev->mutex); + if (enable) { + vhost_dev->tx_q[qid].map = qid; + } else { + vhost_dev->tx_q[qid].map = -1; + } + netdev_dpdk_remap_txqs(vhost_dev); + exists = true; + ovs_mutex_unlock(&vhost_dev->mutex); + break; + } + } + ovs_mutex_unlock(&dpdk_mutex); + + if (exists) { + VLOG_INFO("State of queue %d ( tx_qid %d ) of vhost device '%s' %" + PRIu64" changed to \'%s\'", queue_id, qid, dev->ifname, + dev->device_fh, (enable == 1) ? "enabled" : "disabled"); + } else { + VLOG_INFO("vHost Device '%s' %"PRIu64" not found", dev->ifname, + dev->device_fh); + return -1; + } + + return 0; +} + struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk *dev) { @@ -1955,6 +2036,7 @@ static const struct virtio_net_device_ops virtio_net_device_ops = { .new_device = new_device, .destroy_device = destroy_device, + .vring_state_changed = vring_state_changed }; static void * -- 2.5.0 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev