On Mon, 19 Oct 2015 13:19:50 +0000
"Xie, Huawei" <huawei.xie at intel.com> wrote:

> >  static int
> > -virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
> > +virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie,
> > +                  int use_indirect)
> >  {
> >     struct vq_desc_extra *dxp;
> >     struct vring_desc *start_dp;
> >     uint16_t seg_num = cookie->nb_segs;
> > -   uint16_t needed = 1 + seg_num;
> > +   uint16_t needed = use_indirect ? 1 : 1 + seg_num;
> >     uint16_t head_idx, idx;
> > -   uint16_t head_size = txvq->hw->vtnet_hdr_size;
> > +   unsigned long offs;
> >  
> >     if (unlikely(txvq->vq_free_cnt == 0))
> >             return -ENOSPC;
> > @@ -220,12 +221,29 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct 
> > rte_mbuf *cookie)
> >     dxp = &txvq->vq_descx[idx];
> >     dxp->cookie = (void *)cookie;
> >     dxp->ndescs = needed;
> > -
> >     start_dp = txvq->vq_ring.desc;
> > -   start_dp[idx].addr =
> > -           txvq->virtio_net_hdr_mem + idx * head_size;
> > -   start_dp[idx].len = (uint32_t)head_size;
> > -   start_dp[idx].flags = VRING_DESC_F_NEXT;
> > +
> > +   if (use_indirect) {
> > +           struct virtio_tx_region *txr
> > +                   = txvq->virtio_net_hdr_mz->addr;
> > +
> > +           offs = idx * sizeof(struct virtio_tx_region)
> > +                   + offsetof(struct virtio_tx_region, tx_indir);
> > +
> > +           start_dp[idx].addr  = txvq->virtio_net_hdr_mem + offs;
> > +           start_dp[idx].len   = (seg_num + 1) * sizeof(struct 
> > vring_desc);  
> a. In indirect mode, as we always use one descriptor, could we allocate
> one fixed descriptor as what i did in RX/TX ring layout optimization? :).

The code can not assume all packets will be in indirect mode. If using
any_layout, then some packets will use that. Also if you give a packet
where nb_segs is very large, then it falls back to old mode.
Also some hosts (like vhost) don't support indirect.

> b. If not a, we could cache the descriptor, avoid update unless the
> fields are different. In current implementation of free desc list, we
> could make them always use the same tx desc for the same ring slot. I am
> to submit a patch for normal rx path.

See above

> c. Could we initialize the length of all tx descriptors to be
> VIRTIO_MAX_INDIRECT * sizeof(struct vring_desc)? Is maximum length ok
> here? Does the spec require that the length field reflects the length of
> real used descs, as we already have the next field to indicate the last
> descriptor.

The largest VIRTIO_MAX_INDIRECT possible is very large 4K


> 
> > +           start_dp[idx].flags = VRING_DESC_F_INDIRECT;
> > +
> > +           start_dp = txr[idx].tx_indir;
> > +           idx = 0;
> > +   } else {
> > +           offs = idx * sizeof(struct virtio_tx_region)
> > +                   + offsetof(struct virtio_tx_region, tx_hdr);
> > +
> > +           start_dp[idx].addr  = txvq->virtio_net_hdr_mem + offs;
> > +           start_dp[idx].len   = txvq->hw->vtnet_hdr_size;
> > +           start_dp[idx].flags = VRING_DESC_F_NEXT;
> > +   }
> >  
> >     for (; ((seg_num > 0) && (cookie != NULL)); seg_num--) {
> >             idx = start_dp[idx].next;  
> This looks weird to me. Why skip the first user provided descriptor?
> idx = 0
> idx = start_dp[idx].next
> start_dp[idx].addr = ...

The first descriptor (0) is initialized once to point to the static
all zeros tx header. Then code skips to second entry to initailize the
first data block.

> 
> > @@ -236,7 +254,12 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct 
> > rte_mbuf *cookie)
> >     }
> >  
> >     start_dp[idx].flags &= ~VRING_DESC_F_NEXT;
> > -   idx = start_dp[idx].next;
> > +
> > +   if (use_indirect)
> > +           idx = txvq->vq_ring.desc[head_idx].next;
> > +   else
> > +           idx = start_dp[idx].next;
> > +
> >     txvq->vq_desc_head_idx = idx;
> >     if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
> >             txvq->vq_desc_tail_idx = idx;
> > @@ -261,7 +284,7 @@ static void
> >  virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
> >  {
> >     struct rte_mbuf *m;
> > -   int i, nbufs, error, size = vq->vq_nentries;
> > +   int nbufs, error, size = vq->vq_nentries;
> >     struct vring *vr = &vq->vq_ring;
> >     uint8_t *ring_mem = vq->vq_ring_virt_mem;
> >  
> > @@ -279,10 +302,7 @@ virtio_dev_vring_start(struct virtqueue *vq, int 
> > queue_type)
> >     vq->vq_free_cnt = vq->vq_nentries;
> >     memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
> >  
> > -   /* Chain all the descriptors in the ring with an END */
> > -   for (i = 0; i < size - 1; i++)
> > -           vr->desc[i].next = (uint16_t)(i + 1);
> > -   vr->desc[i].next = VQ_RING_DESC_CHAIN_END;
> > +   vring_desc_init(vr->desc, size);
> >  
> >     /*
> >      * Disable device(host) interrupting guest
> > @@ -760,7 +780,15 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf 
> > **tx_pkts, uint16_t nb_pkts)
> >  
> >     for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
> >             struct rte_mbuf *txm = tx_pkts[nb_tx];
> > -           int need = txm->nb_segs - txvq->vq_free_cnt + 1;
> > +           int use_indirect, slots, need;
> > +
> > +           use_indirect = vtpci_with_feature(txvq->hw,
> > +                                             VIRTIO_RING_F_INDIRECT_DESC)
> > +                   && (txm->nb_segs < VIRTIO_MAX_TX_INDIRECT);
> > +
> > +           /* How many ring entries are needed to this Tx? */  
> "how many descs" is more accurate , at least to me, ring entries/slots
> means entries/slots in avail ring.
> If it is OK, s/slots/descs/ as well.

The virtio spec doesn't use the words descriptors. that is more an Intel
driver terminolgy.

> 
> > +           slots = use_indirect ? 1 : 1 + txm->nb_segs;
> > +           need = slots - txvq->vq_free_cnt;
> >  
> >             /* Positive value indicates it need free vring descriptors */
> >             if (need > 0) {
> > @@ -769,7 +797,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf 
> > **tx_pkts, uint16_t nb_pkts)
> >                     need = RTE_MIN(need, (int)nb_used);
> >  
> >                     virtio_xmit_cleanup(txvq, need);
> > -                   need = txm->nb_segs - txvq->vq_free_cnt + 1;
> > +                   need = slots - txvq->vq_free_cnt;
> >                     if (unlikely(need > 0)) {
> >                             PMD_TX_LOG(ERR,
> >                                        "No free tx descriptors to 
> > transmit");
> > @@ -787,7 +815,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf 
> > **tx_pkts, uint16_t nb_pkts)
> >             }
> >  
> >             /* Enqueue Packet buffers */
> > -           error = virtqueue_enqueue_xmit(txvq, txm);
> > +           error = virtqueue_enqueue_xmit(txvq, txm, use_indirect);
> >             if (unlikely(error)) {
> >                     if (error == ENOSPC)
> >                             PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 
> > 0");
> > diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
> > index 7789411..fe3fa66 100644
> > --- a/drivers/net/virtio/virtqueue.h
> > +++ b/drivers/net/virtio/virtqueue.h
> > @@ -237,6 +237,25 @@ struct virtio_net_hdr_mrg_rxbuf {
> >     uint16_t num_buffers; /**< Number of merged rx buffers */
> >  };
> >  
> > +/* Region reserved to allow for transmit header and indirect ring */
> > +#define VIRTIO_MAX_TX_INDIRECT 8
> > +struct virtio_tx_region {
> > +   struct virtio_net_hdr_mrg_rxbuf tx_hdr;  
> Any reason to use merge-able header here?
> > +   struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT]
> > +                      __attribute__((__aligned__(16)));  
> WARNING: __aligned(size) is preferred over __attribute__((aligned(size)))
> [...]

Reply via email to