Re: [dpdk-dev] [PATCH v6 06/13] vhost: add packed ring batch dequeue

Liu, Yong Wed, 16 Oct 2019 18:31:06 -0700


> -----Original Message-----
> From: Maxime Coquelin [mailto:[email protected]]
> Sent: Wednesday, October 16, 2019 6:36 PM
> To: Liu, Yong <[email protected]>; Bie, Tiwei <[email protected]>; Wang,
> Zhihong <[email protected]>; [email protected];
> [email protected]
> Cc: [email protected]
> Subject: Re: [PATCH v6 06/13] vhost: add packed ring batch dequeue
> 
> 
> 
> On 10/15/19 6:07 PM, Marvin Liu wrote:
> > Add batch dequeue function like enqueue function for packed ring, batch
> > dequeue function will not support chained descritpors, single packet
> > dequeue function will handle it.
> >
> > Signed-off-by: Marvin Liu <[email protected]>
> >
> > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> > index 18d01cb19..96bf763b1 100644
> > --- a/lib/librte_vhost/vhost.h
> > +++ b/lib/librte_vhost/vhost.h
> > @@ -39,6 +39,9 @@
> >
> >  #define VHOST_LOG_CACHE_NR 32
> >
> > +#define PACKED_DESC_SINGLE_DEQUEUE_FLAG (VRING_DESC_F_NEXT | \
> > +                                    VRING_DESC_F_INDIRECT)
> > +
> >  #define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \
> >                         sizeof(struct vring_packed_desc))
> >  #define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1)
> > diff --git a/lib/librte_vhost/virtio_net.c
> b/lib/librte_vhost/virtio_net.c
> > index e1b06c1ce..274a28f99 100644
> > --- a/lib/librte_vhost/virtio_net.c
> > +++ b/lib/librte_vhost/virtio_net.c
> > @@ -1551,6 +1551,113 @@ virtio_dev_tx_split(struct virtio_net *dev,
> struct vhost_virtqueue *vq,
> >     return i;
> >  }
> >
> > +static __rte_always_inline int
> > +vhost_reserve_avail_batch_packed(struct virtio_net *dev,
> > +                            struct vhost_virtqueue *vq,
> > +                            struct rte_mempool *mbuf_pool,
> > +                            struct rte_mbuf **pkts,
> > +                            uint16_t avail_idx,
> > +                            uintptr_t *desc_addrs,
> > +                            uint16_t *ids)
> > +{
> > +   bool wrap = vq->avail_wrap_counter;
> > +   struct vring_packed_desc *descs = vq->desc_packed;
> > +   struct virtio_net_hdr *hdr;
> > +   uint64_t lens[PACKED_BATCH_SIZE];
> > +   uint64_t buf_lens[PACKED_BATCH_SIZE];
> > +   uint32_t buf_offset = dev->vhost_hlen;
> > +   uint16_t flags, i;
> > +
> > +   if (unlikely(avail_idx & PACKED_BATCH_MASK))
> > +           return -1;
> > +   if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
> > +           return -1;
> > +
> > +   for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
> > +           flags = descs[avail_idx + i].flags;
> > +           if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) ||
> > +                        (wrap == !!(flags & VRING_DESC_F_USED))  ||
> > +                        (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG)))
> > +                   return -1;
> > +   }
> > +
> > +   rte_smp_rmb();
> > +
> > +   for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
> > +           lens[i] = descs[avail_idx + i].len;
> > +
> > +   for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
> > +           desc_addrs[i] = vhost_iova_to_vva(dev, vq,
> > +                                             descs[avail_idx + i].addr,
> > +                                             &lens[i], VHOST_ACCESS_RW);
> > +   }
> > +
> > +   for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
> > +           if (unlikely((lens[i] != descs[avail_idx + i].len)))
> > +                   return -1;
> > +   }
> > +
> > +   if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, PACKED_BATCH_SIZE))
> > +           return -1;
> > +
> > +   for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
> > +           buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off;
> > +
> > +   for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
> > +           if (unlikely(buf_lens[i] < (lens[i] - buf_offset)))
> > +                   goto free_buf;
> > +   }
> > +
> > +   for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
> > +           pkts[i]->pkt_len = descs[avail_idx + i].len - buf_offset;
> > +           pkts[i]->data_len = pkts[i]->pkt_len;
> > +           ids[i] = descs[avail_idx + i].id;
> > +   }
> > +
> > +   if (virtio_net_with_host_offload(dev)) {
> > +           for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
> > +                   hdr = (struct virtio_net_hdr *)(desc_addrs[i]);
> > +                   vhost_dequeue_offload(hdr, pkts[i]);
> > +           }
> > +   }
> > +
> > +   return 0;
> > +
> > +free_buf:
> > +   for (i = 0; i < PACKED_BATCH_SIZE; i++)
> > +           rte_pktmbuf_free(pkts[i]);
> > +
> > +   return -1;
> > +}
> > +
> > +static __rte_unused int
> > +virtio_dev_tx_batch_packed(struct virtio_net *dev,
> > +                      struct vhost_virtqueue *vq,
> > +                      struct rte_mempool *mbuf_pool,
> > +                      struct rte_mbuf **pkts)
> > +{
> > +   uint16_t avail_idx = vq->last_avail_idx;
> > +   uint32_t buf_offset = dev->vhost_hlen;
> > +   uintptr_t desc_addrs[PACKED_BATCH_SIZE];
> > +   uint16_t ids[PACKED_BATCH_SIZE];
> > +   uint16_t i;
> > +
> > +   if (vhost_reserve_avail_batch_packed(dev, vq, mbuf_pool, pkts,
> > +                                        avail_idx, desc_addrs, ids))
> > +           return -1;
> > +
> > +   for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
> > +           rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
> 
> Wouldn't we have better performance with a dedicated unroll loop for the
> prefetches, so that desc_addrs[i+1] is being prefetched while the
> desc_addr[i] memcpy is done?
> 
Thanks, Maxime. It will be slightly better. Will have dedicated unroll loop in 
next version.


Regards,
marvin

> > +           rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0),
> > +                      (void *)(uintptr_t)(desc_addrs[i] + buf_offset),
> > +                      pkts[i]->pkt_len);
> > +   }
> > +
> > +   vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);
> > +
> > +   return 0;
> > +}
> > +
> >  static __rte_always_inline int
> >  vhost_dequeue_single_packed(struct virtio_net *dev,
> >                         struct vhost_virtqueue *vq,
> >

Re: [dpdk-dev] [PATCH v6 06/13] vhost: add packed ring batch dequeue

Reply via email to