Before this commit, ovs creates one tx and one rx queue for
each dpdk interface and uses only one poll thread for handling
I/O of all dpdk interfaces.  An upcoming patch will allow multiple
poll threads be created.  As a preparation, this commit changes
the default to create multiple tx and rx queues.

Specifically, the default number of rx queues will be the number
of dpdk interfaces on the numa node.  And the upcoming work
will assign each rx queue to a different poll thread.  The default
number of tx queues will be the number of cpu cores on the machine.
Although not all the tx queues will be used, each poll thread will
have its own queue for transmission on the dpdk interface.

Signed-off-by: Alex Wang <al...@nicira.com>

---
PATCH -> V2
- rebase and refactor the code.
---
 lib/dpif-netdev.h |    1 -
 lib/netdev-dpdk.c |   54 ++++++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h
index fb9d0e2..adbbf87 100644
--- a/lib/dpif-netdev.h
+++ b/lib/dpif-netdev.h
@@ -42,7 +42,6 @@ static inline void dp_packet_pad(struct ofpbuf *b)
 
 #define NETDEV_QID_NONE INT_MAX
 
-#define NR_QUEUE   1
 #define NR_PMD_THREADS 1
 
 #ifdef  __cplusplus
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 4f9c5c2..c7bc4c5 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -36,6 +36,7 @@
 #include "odp-util.h"
 #include "ofp-print.h"
 #include "ofpbuf.h"
+#include "ovs-numa.h"
 #include "ovs-thread.h"
 #include "ovs-rcu.h"
 #include "packet-dpif.h"
@@ -154,6 +155,7 @@ struct dpdk_mp {
     struct list list_node OVS_GUARDED_BY(dpdk_mutex);
 };
 
+/* There will one 'struct dpdk_tx_queue' created for each cpu core.*/
 struct dpdk_tx_queue {
     rte_spinlock_t tx_lock;
     int count;
@@ -182,7 +184,7 @@ struct netdev_dpdk {
     int port_id;
     int max_packet_len;
 
-    struct dpdk_tx_queue tx_q[NR_QUEUE];
+    struct dpdk_tx_queue *tx_q;
 
     struct ovs_mutex mutex OVS_ACQ_AFTER(dpdk_mutex);
 
@@ -387,6 +389,25 @@ dpdk_watchdog(void *dummy OVS_UNUSED)
     return NULL;
 }
 
+/* Returns the number of dpdk ifaces on the numa node 'numa_id'. */
+static int
+dpdk_get_n_devs(int numa_id)
+{
+    int count = 0;
+    int i;
+
+    ovs_assert(ovs_numa_numa_id_is_valid(numa_id));
+
+    for (i = 0; i < rte_eth_dev_count(); i++) {
+        if (rte_eth_dev_socket_id(i) == numa_id) {
+            count++;
+        }
+    }
+    ovs_assert(count);
+
+    return count;
+}
+
 static int
 dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex)
 {
@@ -399,13 +420,14 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) 
OVS_REQUIRES(dpdk_mutex)
         return ENODEV;
     }
 
-    diag = rte_eth_dev_configure(dev->port_id, NR_QUEUE, NR_QUEUE,  
&port_conf);
+    diag = rte_eth_dev_configure(dev->port_id, dev->up.n_rxq, dev->up.n_txq,
+                                 &port_conf);
     if (diag) {
         VLOG_ERR("eth dev config error %d",diag);
         return -diag;
     }
 
-    for (i = 0; i < NR_QUEUE; i++) {
+    for (i = 0; i < dev->up.n_txq; i++) {
         diag = rte_eth_tx_queue_setup(dev->port_id, i, NIC_PORT_TX_Q_SIZE,
                                       dev->socket_id, &tx_conf);
         if (diag) {
@@ -414,7 +436,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) 
OVS_REQUIRES(dpdk_mutex)
         }
     }
 
-    for (i = 0; i < NR_QUEUE; i++) {
+    for (i = 0; i < dev->up.n_rxq; i++) {
         diag = rte_eth_rx_queue_setup(dev->port_id, i, NIC_PORT_RX_Q_SIZE,
                                       dev->socket_id,
                                       &rx_conf, dev->dpdk_mp->mp);
@@ -466,15 +488,27 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
port_no) OVS_REQUIRES(dpdk
 {
     struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
     int err = 0;
-    int i;
+    int n_cores, i;
 
     ovs_mutex_init(&netdev->mutex);
 
     ovs_mutex_lock(&netdev->mutex);
 
-    for (i = 0; i < NR_QUEUE; i++) {
+    /* There can only be ovs_numa_get_n_cores() pmd threads, so creates a tx_q
+     * for each of them. */
+    n_cores = ovs_numa_get_n_cores();
+    if (n_cores == OVS_CORE_UNSPEC) {
+        VLOG_WARN_RL(&rl, "netdev_dpdk txq creation failed due to no cpu"
+                     " core info");
+        err = ENOENT;
+        goto unlock;
+    }
+    netdev->tx_q = dpdk_rte_mzalloc(n_cores * sizeof *netdev->tx_q);
+    for (i = 0; i < n_cores; i++) {
         rte_spinlock_init(&netdev->tx_q[i].tx_lock);
     }
+    netdev_->n_txq = n_cores;
+    netdev_->n_rxq = dpdk_get_n_devs(netdev->socket_id);
 
     netdev->port_id = port_no;
 
@@ -495,12 +529,13 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
port_no) OVS_REQUIRES(dpdk
     if (err) {
         goto unlock;
     }
-    netdev_->n_txq = NR_QUEUE;
-    netdev_->n_rxq = NR_QUEUE;
 
     list_push_back(&dpdk_list, &netdev->list_node);
 
 unlock:
+    if (err) {
+        rte_free(netdev->tx_q);
+    }
     ovs_mutex_unlock(&netdev->mutex);
     return err;
 }
@@ -552,6 +587,7 @@ netdev_dpdk_destruct(struct netdev *netdev_)
     ovs_mutex_unlock(&dev->mutex);
 
     ovs_mutex_lock(&dpdk_mutex);
+    rte_free(dev->tx_q);
     list_remove(&dev->list_node);
     dpdk_mp_put(dev->dpdk_mp);
     ovs_mutex_unlock(&dpdk_mutex);
@@ -812,7 +848,7 @@ netdev_dpdk_send(struct netdev *netdev, int qid, struct 
dpif_packet **pkts,
         int next_tx_idx = 0;
         int dropped = 0;
 
-        qid = rte_lcore_id() % NR_QUEUE;
+        qid = rte_lcore_id();
 
         for (i = 0; i < cnt; i++) {
             int size = ofpbuf_size(&pkts[i]->ofpbuf);
-- 
1.7.9.5

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to