On 02.06.2016 04:33, Daniele Di Proietto wrote:
> I wanted to do this for a long time now, thanks for posting this patch.
> 
> I didn't notice a drop in throughput with this patch, for phy-ovs-phy
> tests, even when we call rte_eth_tx_burst() for every single packet.

It's good news.

> How about 'dpdk_tx_burst()' instead of 'netdev_dpdk_eth_instant_send()'?
> The "instant" part makes sense compared to the current code, but that
> code is removed.

Yes, I also wanted to change it. May be 'netdev_dpdk_eth_tx_burst()'? Just
to be closer to coding style, and, also, this points out that function
works with 'eth' devices. But, I think, you may choose any name you want
and just replace while pushing.

> Acked-by: Daniele Di Proietto <diproiet...@vmware.com>
> 
> If there are no objection I can push this separately from the rest of
> the series.

OK. Thanks.
Best regards, Ilya Maximets.

> On 24/05/2016 06:34, "Ilya Maximets" <i.maxim...@samsung.com> wrote:
> 
>> Current implementarion of TX packet's queueing is broken in several ways:
>>
>>      * TX queue flushing implemented on receive assumes that all
>>        core_id-s are sequential and starts from zero. This may lead
>>        to situation when packets will stuck in queue forever and,
>>        also, this influences on latency.
>>
>>      * For a long time flushing logic depends on uninitialized
>>        'txq_needs_locking', because it usually calculated after
>>        'netdev_dpdk_alloc_txq' but used inside of this function
>>        for initialization of 'flush_tx'.
>>
>> According to current flushing logic, constant flushing required if TX
>> queues will be shared among different CPUs. Further patches will implement
>> mechanisms for manipulations with TX queues in runtime. In this case PMD
>> threads will not know is current queue shared or not. This means that
>> constant flushing will be required.
>>
>> Conclusion: Lets remove queueing at all because it doesn't work
>> properly now and, also, constant flushing required anyway.
>>
>> Testing on basic PHY-OVS-PHY and PHY-OVS-VM-OVS-PHY scenarios shows
>> insignificant performance drop (less than 0.5 percents) in compare to
>> profit that we can achieve in the future using XPS or other features.
>>
>> Signed-off-by: Ilya Maximets <i.maxim...@samsung.com>
>> ---
>> lib/netdev-dpdk.c | 102 
>> ++++++++----------------------------------------------
>> 1 file changed, 14 insertions(+), 88 deletions(-)
>>
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index 0d1b8c9..66e33df 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -167,7 +167,6 @@ static const struct rte_eth_conf port_conf = {
>>     },
>> };
>>
>> -enum { MAX_TX_QUEUE_LEN = 384 };
>> enum { DPDK_RING_SIZE = 256 };
>> BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE));
>> enum { DRAIN_TSC = 200000ULL };
>> @@ -284,8 +283,7 @@ static struct ovs_list dpdk_mp_list 
>> OVS_GUARDED_BY(dpdk_mutex)
>>     = OVS_LIST_INITIALIZER(&dpdk_mp_list);
>>
>> /* This mutex must be used by non pmd threads when allocating or freeing
>> - * mbufs through mempools. Since dpdk_queue_pkts() and dpdk_queue_flush() 
>> may
>> - * use mempools, a non pmd thread should hold this mutex while calling them 
>> */
>> + * mbufs through mempools. */
>> static struct ovs_mutex nonpmd_mempool_mutex = OVS_MUTEX_INITIALIZER;
>>
>> struct dpdk_mp {
>> @@ -299,17 +297,12 @@ struct dpdk_mp {
>> /* There should be one 'struct dpdk_tx_queue' created for
>>  * each cpu core. */
>> struct dpdk_tx_queue {
>> -    bool flush_tx;                 /* Set to true to flush queue everytime 
>> */
>> -                                   /* pkts are queued. */
>> -    int count;
>>     rte_spinlock_t tx_lock;        /* Protects the members and the NIC queue
>>                                     * from concurrent access.  It is used 
>> only
>>                                     * if the queue is shared among different
>>                                     * pmd threads (see 'txq_needs_locking'). 
>> */
>>     int map;                       /* Mapping of configured vhost-user queues
>>                                     * to enabled by guest. */
>> -    uint64_t tsc;
>> -    struct rte_mbuf *burst_pkts[MAX_TX_QUEUE_LEN];
>> };
>>
>> /* dpdk has no way to remove dpdk ring ethernet devices
>> @@ -703,19 +696,6 @@ netdev_dpdk_alloc_txq(struct netdev_dpdk *dev, unsigned 
>> int n_txqs)
>>
>>     dev->tx_q = dpdk_rte_mzalloc(n_txqs * sizeof *dev->tx_q);
>>     for (i = 0; i < n_txqs; i++) {
>> -        int numa_id = ovs_numa_get_numa_id(i);
>> -
>> -        if (!dev->txq_needs_locking) {
>> -            /* Each index is considered as a cpu core id, since there should
>> -             * be one tx queue for each cpu core.  If the corresponding core
>> -             * is not on the same numa node as 'dev', flags the
>> -             * 'flush_tx'. */
>> -            dev->tx_q[i].flush_tx = dev->socket_id == numa_id;
>> -        } else {
>> -            /* Queues are shared among CPUs. Always flush */
>> -            dev->tx_q[i].flush_tx = true;
>> -        }
>> -
>>         /* Initialize map for vhost devices. */
>>         dev->tx_q[i].map = OVS_VHOST_QUEUE_MAP_UNKNOWN;
>>         rte_spinlock_init(&dev->tx_q[i].tx_lock);
>> @@ -1056,16 +1036,15 @@ netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)
>> }
>>
>> static inline void
>> -dpdk_queue_flush__(struct netdev_dpdk *dev, int qid)
>> +netdev_dpdk_eth_instant_send(struct netdev_dpdk *dev, int qid,
>> +                             struct rte_mbuf **pkts, int cnt)
>> {
>> -    struct dpdk_tx_queue *txq = &dev->tx_q[qid];
>>     uint32_t nb_tx = 0;
>>
>> -    while (nb_tx != txq->count) {
>> +    while (nb_tx != cnt) {
>>         uint32_t ret;
>>
>> -        ret = rte_eth_tx_burst(dev->port_id, qid, txq->burst_pkts + nb_tx,
>> -                               txq->count - nb_tx);
>> +        ret = rte_eth_tx_burst(dev->port_id, qid, pkts + nb_tx, cnt - 
>> nb_tx);
>>         if (!ret) {
>>             break;
>>         }
>> @@ -1073,32 +1052,18 @@ dpdk_queue_flush__(struct netdev_dpdk *dev, int qid)
>>         nb_tx += ret;
>>     }
>>
>> -    if (OVS_UNLIKELY(nb_tx != txq->count)) {
>> +    if (OVS_UNLIKELY(nb_tx != cnt)) {
>>         /* free buffers, which we couldn't transmit, one at a time (each
>>          * packet could come from a different mempool) */
>>         int i;
>>
>> -        for (i = nb_tx; i < txq->count; i++) {
>> -            rte_pktmbuf_free(txq->burst_pkts[i]);
>> +        for (i = nb_tx; i < cnt; i++) {
>> +            rte_pktmbuf_free(pkts[i]);
>>         }
>>         rte_spinlock_lock(&dev->stats_lock);
>> -        dev->stats.tx_dropped += txq->count-nb_tx;
>> +        dev->stats.tx_dropped += cnt - nb_tx;
>>         rte_spinlock_unlock(&dev->stats_lock);
>>     }
>> -
>> -    txq->count = 0;
>> -    txq->tsc = rte_get_timer_cycles();
>> -}
>> -
>> -static inline void
>> -dpdk_queue_flush(struct netdev_dpdk *dev, int qid)
>> -{
>> -    struct dpdk_tx_queue *txq = &dev->tx_q[qid];
>> -
>> -    if (txq->count == 0) {
>> -        return;
>> -    }
>> -    dpdk_queue_flush__(dev, qid);
>> }
>>
>> static bool
>> @@ -1207,18 +1172,8 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct 
>> dp_packet **packets,
>>                      int *c)
>> {
>>     struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq);
>> -    struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
>>     int nb_rx;
>>
>> -    /* There is only one tx queue for this core.  Do not flush other
>> -     * queues.
>> -     * Do not flush tx queue which is shared among CPUs
>> -     * since it is always flushed */
>> -    if (rxq->queue_id == rte_lcore_id() &&
>> -        OVS_LIKELY(!dev->txq_needs_locking)) {
>> -        dpdk_queue_flush(dev, rxq->queue_id);
>> -    }
>> -
>>     nb_rx = rte_eth_rx_burst(rx->port_id, rxq->queue_id,
>>                              (struct rte_mbuf **) packets,
>>                              NETDEV_MAX_BURST);
>> @@ -1345,35 +1300,6 @@ out:
>>     }
>> }
>>
>> -inline static void
>> -dpdk_queue_pkts(struct netdev_dpdk *dev, int qid,
>> -               struct rte_mbuf **pkts, int cnt)
>> -{
>> -    struct dpdk_tx_queue *txq = &dev->tx_q[qid];
>> -    uint64_t diff_tsc;
>> -
>> -    int i = 0;
>> -
>> -    while (i < cnt) {
>> -        int freeslots = MAX_TX_QUEUE_LEN - txq->count;
>> -        int tocopy = MIN(freeslots, cnt-i);
>> -
>> -        memcpy(&txq->burst_pkts[txq->count], &pkts[i],
>> -               tocopy * sizeof (struct rte_mbuf *));
>> -
>> -        txq->count += tocopy;
>> -        i += tocopy;
>> -
>> -        if (txq->count == MAX_TX_QUEUE_LEN || txq->flush_tx) {
>> -            dpdk_queue_flush__(dev, qid);
>> -        }
>> -        diff_tsc = rte_get_timer_cycles() - txq->tsc;
>> -        if (diff_tsc >= DRAIN_TSC) {
>> -            dpdk_queue_flush__(dev, qid);
>> -        }
>> -    }
>> -}
>> -
>> /* Tx function. Transmit packets indefinitely */
>> static void
>> dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
>> @@ -1435,8 +1361,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
>> dp_packet **pkts,
>>         newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);
>>
>>         dropped += qos_pkts - newcnt;
>> -        dpdk_queue_pkts(dev, qid, mbufs, newcnt);
>> -        dpdk_queue_flush(dev, qid);
>> +        netdev_dpdk_eth_instant_send(dev, qid, mbufs, newcnt);
>>     }
>>
>>     if (OVS_UNLIKELY(dropped)) {
>> @@ -1508,7 +1433,7 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>>                     temp_cnt = netdev_dpdk_qos_run__(dev, (struct 
>> rte_mbuf**)pkts,
>>                                                 temp_cnt);
>>                     dropped += qos_pkts - temp_cnt;
>> -                    dpdk_queue_pkts(dev, qid,
>> +                    netdev_dpdk_eth_instant_send(dev, qid,
>>                                     (struct rte_mbuf **)&pkts[next_tx_idx],
>>                                     temp_cnt);
>>
>> @@ -1528,8 +1453,9 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>>
>>             cnt = netdev_dpdk_qos_run__(dev, (struct rte_mbuf**)pkts, cnt);
>>             dropped += qos_pkts - cnt;
>> -            dpdk_queue_pkts(dev, qid, (struct rte_mbuf 
>> **)&pkts[next_tx_idx],
>> -                            cnt);
>> +            netdev_dpdk_eth_instant_send(dev, qid,
>> +                                         (struct rte_mbuf 
>> **)&pkts[next_tx_idx],
>> +                                         cnt);
>>         }
>>
>>         if (OVS_UNLIKELY(dropped)) {
>> -- 
>> 2.5.0
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to