On 10/22/2015 10:26 AM, Jianfeng wrote: > -----Original Message----- > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Huawei Xie > Sent: Tuesday, October 20, 2015 11:30 PM > To: dev at dpdk.org > Subject: [dpdk-dev] [PATCH v3 6/7] virtio: simple tx routine > > Changes in v3: > - Remove return at the end of void function > - Remove always_inline attribute for virtio_xmit_cleanup > > bulk free of mbufs when clean used ring. > shift operation of idx could be saved if vq_free_cnt means free slots rather > than free descriptors. > > TODO: rearrange vq data structure, pack the stats var together so that we > could use one vec instruction to update all of them. > > Signed-off-by: Huawei Xie <huawei.xie at intel.com> > --- > drivers/net/virtio/virtio_ethdev.h | 3 ++ > drivers/net/virtio/virtio_rxtx_simple.c | 93 > +++++++++++++++++++++++++++++++++ > 2 files changed, 96 insertions(+) > > diff --git a/drivers/net/virtio/virtio_ethdev.h > b/drivers/net/virtio/virtio_ethdev.h > index d7797ab..ae2d47d 100644 > --- a/drivers/net/virtio/virtio_ethdev.h > +++ b/drivers/net/virtio/virtio_ethdev.h > @@ -111,6 +111,9 @@ uint16_t virtio_xmit_pkts(void *tx_queue, struct > rte_mbuf **tx_pkts, uint16_t virtio_recv_pkts_vec(void *rx_queue, struct > rte_mbuf **rx_pkts, > uint16_t nb_pkts); > > +uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf > **tx_pkts, > + uint16_t nb_pkts); > + > /* > * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us > * frames larger than 1514 bytes. We do not yet support software LRO diff -- > git a/drivers/net/virtio/virtio_rxtx_simple.c > b/drivers/net/virtio/virtio_rxtx_simple.c > index ef17562..a53d462 100644 > --- a/drivers/net/virtio/virtio_rxtx_simple.c > +++ b/drivers/net/virtio/virtio_rxtx_simple.c > @@ -288,6 +288,99 @@ virtio_recv_pkts_vec(void *rx_queue, struct > rte_mbuf **rx_pkts, > return nb_pkts_received; > } > > +#define VIRTIO_TX_FREE_THRESH 32 > +#define VIRTIO_TX_MAX_FREE_BUF_SZ 32 > +#define VIRTIO_TX_FREE_NR 32 > +/* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid > +shift */ static inline void virtio_xmit_cleanup(struct virtqueue *vq) { > + uint16_t i, desc_idx; > + int nb_free = 0; > + struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ]; > + > + desc_idx = (uint16_t)(vq->vq_used_cons_idx & > + ((vq->vq_nentries >> 1) - 1)); > + free[0] = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; > + nb_free = 1; > + > + for (i = 1; i < VIRTIO_TX_FREE_NR; i++) { > + m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; > + if (likely(m->pool == free[0]->pool)) > + free[nb_free++] = m; > + else { > + rte_mempool_put_bulk(free[0]->pool, (void **)free, > + nb_free); > + free[0] = m; > + nb_free = 1; > + } > + } > + > + rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free); > + vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR; > + vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1); } > + > +uint16_t > +virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, > + uint16_t nb_pkts) > +{ > + struct virtqueue *txvq = tx_queue; > + uint16_t nb_used; > + uint16_t desc_idx; > + struct vring_desc *start_dp; > + uint16_t nb_tail, nb_commit; > + int i; > + uint16_t desc_idx_max = (txvq->vq_nentries >> 1) - 1; > + > + nb_used = VIRTQUEUE_NUSED(txvq); > + rte_compiler_barrier(); > + > + nb_commit = nb_pkts = RTE_MIN((txvq->vq_free_cnt >> 1), > nb_pkts);
Here if nb_commit is zero, how about return 0 immediately? > + desc_idx = (uint16_t) (txvq->vq_avail_idx & desc_idx_max); > + start_dp = txvq->vq_ring.desc; > + nb_tail = (uint16_t) (desc_idx_max + 1 - desc_idx); > + > + if (nb_used >= VIRTIO_TX_FREE_THRESH) > + virtio_xmit_cleanup(tx_queue); If this cleanup should be put before vq_free_cnt is referenced? It's because it may free some descs to vq_free_cnt. > + > + if (nb_commit >= nb_tail) { > + for (i = 0; i < nb_tail; i++) > + txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i]; > + for (i = 0; i < nb_tail; i++) { > + start_dp[desc_idx].addr = > + RTE_MBUF_DATA_DMA_ADDR(*tx_pkts); > + start_dp[desc_idx].len = (*tx_pkts)->pkt_len; > + tx_pkts++; > + desc_idx++; > + } > + nb_commit -= nb_tail; > + desc_idx = 0; > + } > + for (i = 0; i < nb_commit; i++) > + txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i]; > + for (i = 0; i < nb_commit; i++) { > + start_dp[desc_idx].addr = > RTE_MBUF_DATA_DMA_ADDR(*tx_pkts); > + start_dp[desc_idx].len = (*tx_pkts)->pkt_len; > + tx_pkts++; > + desc_idx++; > + } > + > + rte_compiler_barrier(); > + > + txvq->vq_free_cnt -= (uint16_t)(nb_pkts << 1); > + txvq->vq_avail_idx += nb_pkts; > + txvq->vq_ring.avail->idx = txvq->vq_avail_idx; > + txvq->packets += nb_pkts; > + > + if (likely(nb_pkts)) { > + if (unlikely(virtqueue_kick_prepare(txvq))) > + virtqueue_notify(txvq); > + } > + > + return nb_pkts; > +} > + > int __attribute__((cold)) > virtio_rxq_vec_setup(struct virtqueue *rxq) { > -- > 1.8.1.4