Previous commit makes OVS create one tx queue for each cpu
core.  An upcoming patch will allow multiple pmd threads be
created and pinned to cpu cores.  So each pmd thread will use
the tx queue corresponding to its core id.

Moreover, the pmd threads running on different numa node than
the dpdk interface (called non-local pmd thread) will not
handle the rx of the interface.  Consequently, there need to
be a way to flush the tx queues of the non-local pmd threads.

To address the queue flushing issue, this commit introduces a
new flag 'flush_tx' in the 'struct dpdk_tx_queue' which is
set if the queue is to be used by a non-local pmd thread.
Then, when enqueueing the tx pkts, if the flag is set, the tx
queue will always be flushed immediately after the enqueue.

Signed-off-by: Alex Wang <al...@nicira.com>

---
PATCH -> V2
- rebase and refactor the code.
---
 lib/netdev-dpdk.c |   15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index c7bc4c5..d6bf0bd 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -158,6 +158,8 @@ struct dpdk_mp {
 /* There will one 'struct dpdk_tx_queue' created for each cpu core.*/
 struct dpdk_tx_queue {
     rte_spinlock_t tx_lock;
+    bool flush_tx;                 /* Set to true to flush queue everytime */
+                                   /* pkts are queued. */
     int count;
     uint64_t tsc;
     struct rte_mbuf *burst_pkts[MAX_TX_QUEUE_LEN];
@@ -494,6 +496,9 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
port_no) OVS_REQUIRES(dpdk
 
     ovs_mutex_lock(&netdev->mutex);
 
+    /* XXX: need to discover device node at run time. */
+    netdev->socket_id = SOCKET0;
+
     /* There can only be ovs_numa_get_n_cores() pmd threads, so creates a tx_q
      * for each of them. */
     n_cores = ovs_numa_get_n_cores();
@@ -505,7 +510,12 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
port_no) OVS_REQUIRES(dpdk
     }
     netdev->tx_q = dpdk_rte_mzalloc(n_cores * sizeof *netdev->tx_q);
     for (i = 0; i < n_cores; i++) {
+        int core_id = ovs_numa_get_numa_id(i);
+
         rte_spinlock_init(&netdev->tx_q[i].tx_lock);
+        /* If the corresponding core is not on the same numa node
+         * as 'netdev', flags the 'flush_tx'. */
+        netdev->tx_q[i].flush_tx = netdev->socket_id == core_id;
     }
     netdev_->n_txq = n_cores;
     netdev_->n_rxq = dpdk_get_n_devs(netdev->socket_id);
@@ -516,9 +526,6 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
port_no) OVS_REQUIRES(dpdk
     netdev->mtu = ETHER_MTU;
     netdev->max_packet_len = MTU_TO_MAX_LEN(netdev->mtu);
 
-    /* XXX: need to discover device node at run time. */
-    netdev->socket_id = SOCKET0;
-
     netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, netdev->mtu);
     if (!netdev->dpdk_mp) {
         err = ENOMEM;
@@ -758,7 +765,7 @@ dpdk_queue_pkts(struct netdev_dpdk *dev, int qid,
         txq->count += tocopy;
         i += tocopy;
 
-        if (txq->count == MAX_TX_QUEUE_LEN) {
+        if (txq->count == MAX_TX_QUEUE_LEN || txq->flush_tx) {
             dpdk_queue_flush__(dev, qid);
         }
         diff_tsc = rte_get_timer_cycles() - txq->tsc;
-- 
1.7.9.5

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to