On 8/6/15 5:29 PM, Flavio Leitner wrote:
On Thu, Aug 06, 2015 at 03:24:29PM -0400, Thomas F Herbert wrote:
On 8/6/15 1:40 PM, Flavio Leitner wrote:
On Thu, Aug 06, 2015 at 12:39:29PM -0400, Thomas F Herbert wrote:
On 7/31/15 6:30 PM, Flavio Leitner wrote:
This RFC is based on the vhost multiple queues work on
dpdk-dev: http://dpdk.org/ml/archives/dev/2015-June/019345.html

Signed-off-by: Flavio Leitner <f...@redhat.com>
---
  lib/netdev-dpdk.c | 61 ++++++++++++++++++++++++++++++++++++-------------------
  1 file changed, 40 insertions(+), 21 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 5ae805e..493172c 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -215,12 +215,9 @@ struct netdev_dpdk {
       * If the numbers match, 'txq_needs_locking' is false, otherwise it is
       * true and we will take a spinlock on transmission */
      int real_n_txq;
+    int real_n_rxq;
      bool txq_needs_locking;

-    /* Spinlock for vhost transmission.  Other DPDK devices use spinlocks in
-     * dpdk_tx_queue */
-    rte_spinlock_t vhost_tx_lock;
-
      /* virtio-net structure for vhost device */
      OVSRCU_TYPE(struct virtio_net *) virtio_dev;

@@ -602,13 +599,10 @@ dpdk_dev_parse_name(const char dev_name[], const char 
prefix[],
  static int
  vhost_construct_helper(struct netdev *netdev_) OVS_REQUIRES(dpdk_mutex)
  {
-    struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
-
      if (rte_eal_init_ret) {
          return rte_eal_init_ret;
      }

-    rte_spinlock_init(&netdev->vhost_tx_lock);
      return netdev_dpdk_init(netdev_, -1, DPDK_DEV_VHOST);
  }

@@ -791,9 +785,16 @@ netdev_dpdk_vhost_set_multiq(struct netdev *netdev_, 
unsigned int n_txq,
      ovs_mutex_lock(&dpdk_mutex);
      ovs_mutex_lock(&netdev->mutex);

+    rte_free(netdev->tx_q);
+    /* FIXME: the number of vqueues needs to match */
Do you still need this "FIXME?" Isn't the code you added below freeing and
re-allocating the correct number of tx queues?

Yes, because that is about virtual queues provided by qemu.
Thanks,
fbl
I understand this is an RFC but I think your patch is in the right direction. I know the merging is complex and requires upstream changes to DPDK and Qemu. I ack this patch is an important step that moves the ball forward toward vhost user performance of DPDK accelerated OVS.

--TFH

      netdev->up.n_txq = n_txq;
-    netdev->real_n_txq = 1;
-    netdev->up.n_rxq = 1;
+    netdev->up.n_rxq = n_rxq;
+
+    /* vring has txq = rxq */
+    netdev->real_n_txq = n_rxq;
+    netdev->real_n_rxq = n_rxq;
+    netdev->txq_needs_locking = netdev->real_n_txq != netdev->up.n_txq;
+    netdev_dpdk_alloc_txq(netdev, netdev->up.n_txq);

      ovs_mutex_unlock(&netdev->mutex);
      ovs_mutex_unlock(&dpdk_mutex);
@@ -904,14 +905,14 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq_,
      struct netdev *netdev = rx->up.netdev;
      struct netdev_dpdk *vhost_dev = netdev_dpdk_cast(netdev);
      struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(vhost_dev);
-    int qid = 1;
+    int qid = rxq_->queue_id;
      uint16_t nb_rx = 0;

      if (OVS_UNLIKELY(!is_vhost_running(virtio_dev))) {
          return EAGAIN;
      }

-    nb_rx = rte_vhost_dequeue_burst(virtio_dev, qid,
+    nb_rx = rte_vhost_dequeue_burst(virtio_dev, VIRTIO_TXQ + qid * 2,
                                      vhost_dev->dpdk_mp->mp,
                                      (struct rte_mbuf **)packets,
                                      NETDEV_MAX_BURST);
@@ -958,8 +959,9 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct 
dp_packet **packets,
  }

  static void
-__netdev_dpdk_vhost_send(struct netdev *netdev, struct dp_packet **pkts,
-                         int cnt, bool may_steal)
+__netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
+                         struct dp_packet **pkts, int cnt,
+                         bool may_steal)
  {
      struct netdev_dpdk *vhost_dev = netdev_dpdk_cast(netdev);
      struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(vhost_dev);
@@ -974,13 +976,16 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, struct 
dp_packet **pkts,
          goto out;
      }

-    /* There is vHost TX single queue, So we need to lock it for TX. */
-    rte_spinlock_lock(&vhost_dev->vhost_tx_lock);
+    if (vhost_dev->txq_needs_locking) {
+        qid = qid % vhost_dev->real_n_txq;
+        rte_spinlock_lock(&vhost_dev->tx_q[qid].tx_lock);
+    }

      do {
+        int vhost_qid = VIRTIO_RXQ + qid * VIRTIO_QNUM;
          unsigned int tx_pkts;

-        tx_pkts = rte_vhost_enqueue_burst(virtio_dev, VIRTIO_RXQ,
+        tx_pkts = rte_vhost_enqueue_burst(virtio_dev, vhost_qid,
                                            cur_pkts, cnt);
          if (OVS_LIKELY(tx_pkts)) {
              /* Packets have been sent.*/
@@ -999,7 +1004,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, struct 
dp_packet **pkts,
               * Unable to enqueue packets to vhost interface.
               * Check available entries before retrying.
               */
-            while (!rte_vring_available_entries(virtio_dev, VIRTIO_RXQ)) {
+            while (!rte_vring_available_entries(virtio_dev, vhost_qid)) {
                  if (OVS_UNLIKELY((rte_get_timer_cycles() - start) > timeout)) 
{
                      expired = 1;
                      break;
@@ -1011,7 +1016,10 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, struct 
dp_packet **pkts,
              }
          }
      } while (cnt);
-    rte_spinlock_unlock(&vhost_dev->vhost_tx_lock);
+
+    if (vhost_dev->txq_needs_locking) {
+        rte_spinlock_unlock(&vhost_dev->tx_q[qid].tx_lock);
+    }

      rte_spinlock_lock(&vhost_dev->stats_lock);
      vhost_dev->stats.tx_packets += (total_pkts - cnt);
@@ -1116,7 +1124,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
dp_packet **pkts,
      }

      if (dev->type == DPDK_DEV_VHOST) {
-        __netdev_dpdk_vhost_send(netdev, (struct dp_packet **) mbufs, newcnt, 
true);
+        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) mbufs, 
newcnt, true);
      } else {
          dpdk_queue_pkts(dev, qid, mbufs, newcnt);
          dpdk_queue_flush(dev, qid);
@@ -1128,7 +1136,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
dp_packet **pkts,
  }

  static int
-netdev_dpdk_vhost_send(struct netdev *netdev, int qid OVS_UNUSED, struct 
dp_packet **pkts,
+netdev_dpdk_vhost_send(struct netdev *netdev, int qid, struct dp_packet **pkts,
                   int cnt, bool may_steal)
  {
      if (OVS_UNLIKELY(pkts[0]->source != DPBUF_DPDK)) {
@@ -1141,7 +1149,7 @@ netdev_dpdk_vhost_send(struct netdev *netdev, int qid 
OVS_UNUSED, struct dp_pack
              }
          }
      } else {
-        __netdev_dpdk_vhost_send(netdev, pkts, cnt, may_steal);
+        __netdev_dpdk_vhost_send(netdev, qid, pkts, cnt, may_steal);
      }
      return 0;
  }
@@ -1656,7 +1664,18 @@ new_device(struct virtio_net *dev)
      /* Add device to the vhost port with the same name as that passed down. */
      LIST_FOR_EACH(netdev, list_node, &dpdk_list) {
          if (strncmp(dev->ifname, netdev->vhost_id, IF_NAME_SZ) == 0) {
+            int qp_num = rte_vhost_qp_num_get(dev);
Hi Flavio.

rte_vhost_qp_num_get() is in the multiple queue patch for upstream DPDK,
referenced above, http://dpdk.org/ml/archives/dev/2015-June/019345.html. It
gets the max number of virt queues for the device or 0. Is this unnecessary
locking of the netdev dev, for each queue associated with the device? Should
the test for qp_num below be before the netdev lock?

It only compares the number of queues if the device is the
one we are looking for, so it happens only a single time, not per queue.
And the real_n_rxq is a property of the datapath which can change,
hence the need for the lock.

fbl
OK, Thanks!

              ovs_mutex_lock(&netdev->mutex);
+            if (qp_num != netdev->real_n_rxq) {
+                VLOG_INFO("vHost Device '%s' (%ld) can't be added - "
+                          "unmatched number of queues %d and %d",
+                          dev->ifname, dev->device_fh, qp_num,
+                          netdev->real_n_rxq);
+                ovs_mutex_unlock(&netdev->mutex);
+                ovs_mutex_unlock(&dpdk_mutex);
+                return -1;
+            }
+
              ovsrcu_set(&netdev->virtio_dev, dev);
              ovs_mutex_unlock(&netdev->mutex);
              exists = true;


_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev



_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev



_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to