Re: [ovs-dev] [PATCH v3 1/2] netdev-dpdk: Use instant sending instead of queueing of packets.

Daniele Di Proietto Wed, 06 Jul 2016 15:49:52 -0700

I applied this to master, thanks



On 27/06/2016 06:28, "Ilya Maximets" <i.maxim...@samsung.com> wrote:

>Current implementarion of TX packet's queueing is broken in several ways:
>
>       * TX queue flushing implemented on receive assumes that all
>         core_id-s are sequential and starts from zero. This may lead
>         to situation when packets will stuck in queue forever and,
>         also, this influences on latency.
>
>       * For a long time flushing logic depends on uninitialized
>         'txq_needs_locking', because it usually calculated after
>         'netdev_dpdk_alloc_txq' but used inside of this function
>         for initialization of 'flush_tx'.
>
>Testing shows no performance difference with and without queueing.
>Lets remove queueing at all because it doesn't work properly now and
>also does not increase performance.
>
>Signed-off-by: Ilya Maximets <i.maxim...@samsung.com>
>Acked-by: Daniele Di Proietto <diproiet...@vmware.com>
>---
> lib/netdev-dpdk.c | 101 ++++++++----------------------------------------------
> 1 file changed, 14 insertions(+), 87 deletions(-)
>
>diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>index 02e2c58..8bb33d6 100644
>--- a/lib/netdev-dpdk.c
>+++ b/lib/netdev-dpdk.c
>@@ -165,7 +165,6 @@ static const struct rte_eth_conf port_conf = {
>     },
> };
> 
>-enum { MAX_TX_QUEUE_LEN = 384 };
> enum { DPDK_RING_SIZE = 256 };
> BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE));
> enum { DRAIN_TSC = 200000ULL };
>@@ -282,8 +281,7 @@ static struct ovs_list dpdk_mp_list 
>OVS_GUARDED_BY(dpdk_mutex)
>     = OVS_LIST_INITIALIZER(&dpdk_mp_list);
> 
> /* This mutex must be used by non pmd threads when allocating or freeing
>- * mbufs through mempools. Since dpdk_queue_pkts() and dpdk_queue_flush() may
>- * use mempools, a non pmd thread should hold this mutex while calling them */
>+ * mbufs through mempools. */
> static struct ovs_mutex nonpmd_mempool_mutex = OVS_MUTEX_INITIALIZER;
> 
> struct dpdk_mp {
>@@ -297,17 +295,12 @@ struct dpdk_mp {
> /* There should be one 'struct dpdk_tx_queue' created for
>  * each cpu core. */
> struct dpdk_tx_queue {
>-    bool flush_tx;                 /* Set to true to flush queue everytime */
>-                                   /* pkts are queued. */
>-    int count;
>     rte_spinlock_t tx_lock;        /* Protects the members and the NIC queue
>                                     * from concurrent access.  It is used only
>                                     * if the queue is shared among different
>                                     * pmd threads (see 'txq_needs_locking'). 
> */
>     int map;                       /* Mapping of configured vhost-user queues
>                                     * to enabled by guest. */
>-    uint64_t tsc;
>-    struct rte_mbuf *burst_pkts[MAX_TX_QUEUE_LEN];
> };
> 
> /* dpdk has no way to remove dpdk ring ethernet devices
>@@ -720,19 +713,6 @@ netdev_dpdk_alloc_txq(struct netdev_dpdk *dev, unsigned 
>int n_txqs)
> 
>     dev->tx_q = dpdk_rte_mzalloc(n_txqs * sizeof *dev->tx_q);
>     for (i = 0; i < n_txqs; i++) {
>-        int numa_id = ovs_numa_get_numa_id(i);
>-
>-        if (!dev->txq_needs_locking) {
>-            /* Each index is considered as a cpu core id, since there should
>-             * be one tx queue for each cpu core.  If the corresponding core
>-             * is not on the same numa node as 'dev', flags the
>-             * 'flush_tx'. */
>-            dev->tx_q[i].flush_tx = dev->socket_id == numa_id;
>-        } else {
>-            /* Queues are shared among CPUs. Always flush */
>-            dev->tx_q[i].flush_tx = true;
>-        }
>-
>         /* Initialize map for vhost devices. */
>         dev->tx_q[i].map = OVS_VHOST_QUEUE_MAP_UNKNOWN;
>         rte_spinlock_init(&dev->tx_q[i].tx_lock);
>@@ -1088,16 +1068,15 @@ netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)
> }
> 
> static inline void
>-dpdk_queue_flush__(struct netdev_dpdk *dev, int qid)
>+netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
>+                             struct rte_mbuf **pkts, int cnt)
> {
>-    struct dpdk_tx_queue *txq = &dev->tx_q[qid];
>     uint32_t nb_tx = 0;
> 
>-    while (nb_tx != txq->count) {
>+    while (nb_tx != cnt) {
>         uint32_t ret;
> 
>-        ret = rte_eth_tx_burst(dev->port_id, qid, txq->burst_pkts + nb_tx,
>-                               txq->count - nb_tx);
>+        ret = rte_eth_tx_burst(dev->port_id, qid, pkts + nb_tx, cnt - nb_tx);
>         if (!ret) {
>             break;
>         }
>@@ -1105,32 +1084,18 @@ dpdk_queue_flush__(struct netdev_dpdk *dev, int qid)
>         nb_tx += ret;
>     }
> 
>-    if (OVS_UNLIKELY(nb_tx != txq->count)) {
>+    if (OVS_UNLIKELY(nb_tx != cnt)) {
>         /* free buffers, which we couldn't transmit, one at a time (each
>          * packet could come from a different mempool) */
>         int i;
> 
>-        for (i = nb_tx; i < txq->count; i++) {
>-            rte_pktmbuf_free(txq->burst_pkts[i]);
>+        for (i = nb_tx; i < cnt; i++) {
>+            rte_pktmbuf_free(pkts[i]);
>         }
>         rte_spinlock_lock(&dev->stats_lock);
>-        dev->stats.tx_dropped += txq->count-nb_tx;
>+        dev->stats.tx_dropped += cnt - nb_tx;
>         rte_spinlock_unlock(&dev->stats_lock);
>     }
>-
>-    txq->count = 0;
>-    txq->tsc = rte_get_timer_cycles();
>-}
>-
>-static inline void
>-dpdk_queue_flush(struct netdev_dpdk *dev, int qid)
>-{
>-    struct dpdk_tx_queue *txq = &dev->tx_q[qid];
>-
>-    if (txq->count == 0) {
>-        return;
>-    }
>-    dpdk_queue_flush__(dev, qid);
> }
> 
> static inline bool
>@@ -1303,15 +1268,6 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct 
>dp_packet **packets,
>     int nb_rx;
>     int dropped = 0;
> 
>-    /* There is only one tx queue for this core.  Do not flush other
>-     * queues.
>-     * Do not flush tx queue which is shared among CPUs
>-     * since it is always flushed */
>-    if (rxq->queue_id == rte_lcore_id() &&
>-        OVS_LIKELY(!dev->txq_needs_locking)) {
>-        dpdk_queue_flush(dev, rxq->queue_id);
>-    }
>-
>     nb_rx = rte_eth_rx_burst(rx->port_id, rxq->queue_id,
>                              (struct rte_mbuf **) packets,
>                              NETDEV_MAX_BURST);
>@@ -1433,35 +1389,6 @@ out:
>     }
> }
> 
>-inline static void
>-dpdk_queue_pkts(struct netdev_dpdk *dev, int qid,
>-               struct rte_mbuf **pkts, int cnt)
>-{
>-    struct dpdk_tx_queue *txq = &dev->tx_q[qid];
>-    uint64_t diff_tsc;
>-
>-    int i = 0;
>-
>-    while (i < cnt) {
>-        int freeslots = MAX_TX_QUEUE_LEN - txq->count;
>-        int tocopy = MIN(freeslots, cnt-i);
>-
>-        memcpy(&txq->burst_pkts[txq->count], &pkts[i],
>-               tocopy * sizeof (struct rte_mbuf *));
>-
>-        txq->count += tocopy;
>-        i += tocopy;
>-
>-        if (txq->count == MAX_TX_QUEUE_LEN || txq->flush_tx) {
>-            dpdk_queue_flush__(dev, qid);
>-        }
>-        diff_tsc = rte_get_timer_cycles() - txq->tsc;
>-        if (diff_tsc >= DRAIN_TSC) {
>-            dpdk_queue_flush__(dev, qid);
>-        }
>-    }
>-}
>-
> /* Tx function. Transmit packets indefinitely */
> static void
> dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
>@@ -1527,8 +1454,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
>dp_packet **pkts,
>         newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);
> 
>         dropped += qos_pkts - newcnt;
>-        dpdk_queue_pkts(dev, qid, mbufs, newcnt);
>-        dpdk_queue_flush(dev, qid);
>+        netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);
>     }
> 
>     if (OVS_UNLIKELY(dropped)) {
>@@ -1611,7 +1537,7 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>                     temp_cnt = netdev_dpdk_qos_run__(dev, (struct 
> rte_mbuf**)pkts,
>                                                 temp_cnt);
>                     dropped += qos_pkts - temp_cnt;
>-                    dpdk_queue_pkts(dev, qid,
>+                    netdev_dpdk_eth_tx_burst(dev, qid,
>                                     (struct rte_mbuf **)&pkts[next_tx_idx],
>                                     temp_cnt);
> 
>@@ -1631,8 +1557,9 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
> 
>             cnt = netdev_dpdk_qos_run__(dev, (struct rte_mbuf**)pkts, cnt);
>             dropped += qos_pkts - cnt;
>-            dpdk_queue_pkts(dev, qid, (struct rte_mbuf **)&pkts[next_tx_idx],
>-                            cnt);
>+            netdev_dpdk_eth_tx_burst(dev, qid,
>+                                     (struct rte_mbuf **)&pkts[next_tx_idx],
>+                                     cnt);
>         }
> 
>         if (OVS_UNLIKELY(dropped)) {
>-- 
>2.7.4
>
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Re: [ovs-dev] [PATCH v3 1/2] netdev-dpdk: Use instant sending instead of queueing of packets.

Reply via email to