On Mon, 19 Oct 2015 13:19:50 +0000 "Xie, Huawei" <huawei.xie at intel.com> wrote:
> > static int > > -virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie) > > +virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie, > > + int use_indirect) > > { > > struct vq_desc_extra *dxp; > > struct vring_desc *start_dp; > > uint16_t seg_num = cookie->nb_segs; > > - uint16_t needed = 1 + seg_num; > > + uint16_t needed = use_indirect ? 1 : 1 + seg_num; > > uint16_t head_idx, idx; > > - uint16_t head_size = txvq->hw->vtnet_hdr_size; > > + unsigned long offs; > > > > if (unlikely(txvq->vq_free_cnt == 0)) > > return -ENOSPC; > > @@ -220,12 +221,29 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct > > rte_mbuf *cookie) > > dxp = &txvq->vq_descx[idx]; > > dxp->cookie = (void *)cookie; > > dxp->ndescs = needed; > > - > > start_dp = txvq->vq_ring.desc; > > - start_dp[idx].addr = > > - txvq->virtio_net_hdr_mem + idx * head_size; > > - start_dp[idx].len = (uint32_t)head_size; > > - start_dp[idx].flags = VRING_DESC_F_NEXT; > > + > > + if (use_indirect) { > > + struct virtio_tx_region *txr > > + = txvq->virtio_net_hdr_mz->addr; > > + > > + offs = idx * sizeof(struct virtio_tx_region) > > + + offsetof(struct virtio_tx_region, tx_indir); > > + > > + start_dp[idx].addr = txvq->virtio_net_hdr_mem + offs; > > + start_dp[idx].len = (seg_num + 1) * sizeof(struct > > vring_desc); > a. In indirect mode, as we always use one descriptor, could we allocate > one fixed descriptor as what i did in RX/TX ring layout optimization? :). The code can not assume all packets will be in indirect mode. If using any_layout, then some packets will use that. Also if you give a packet where nb_segs is very large, then it falls back to old mode. Also some hosts (like vhost) don't support indirect. > b. If not a, we could cache the descriptor, avoid update unless the > fields are different. In current implementation of free desc list, we > could make them always use the same tx desc for the same ring slot. I am > to submit a patch for normal rx path. See above > c. Could we initialize the length of all tx descriptors to be > VIRTIO_MAX_INDIRECT * sizeof(struct vring_desc)? Is maximum length ok > here? Does the spec require that the length field reflects the length of > real used descs, as we already have the next field to indicate the last > descriptor. The largest VIRTIO_MAX_INDIRECT possible is very large 4K > > > + start_dp[idx].flags = VRING_DESC_F_INDIRECT; > > + > > + start_dp = txr[idx].tx_indir; > > + idx = 0; > > + } else { > > + offs = idx * sizeof(struct virtio_tx_region) > > + + offsetof(struct virtio_tx_region, tx_hdr); > > + > > + start_dp[idx].addr = txvq->virtio_net_hdr_mem + offs; > > + start_dp[idx].len = txvq->hw->vtnet_hdr_size; > > + start_dp[idx].flags = VRING_DESC_F_NEXT; > > + } > > > > for (; ((seg_num > 0) && (cookie != NULL)); seg_num--) { > > idx = start_dp[idx].next; > This looks weird to me. Why skip the first user provided descriptor? > idx = 0 > idx = start_dp[idx].next > start_dp[idx].addr = ... The first descriptor (0) is initialized once to point to the static all zeros tx header. Then code skips to second entry to initailize the first data block. > > > @@ -236,7 +254,12 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct > > rte_mbuf *cookie) > > } > > > > start_dp[idx].flags &= ~VRING_DESC_F_NEXT; > > - idx = start_dp[idx].next; > > + > > + if (use_indirect) > > + idx = txvq->vq_ring.desc[head_idx].next; > > + else > > + idx = start_dp[idx].next; > > + > > txvq->vq_desc_head_idx = idx; > > if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) > > txvq->vq_desc_tail_idx = idx; > > @@ -261,7 +284,7 @@ static void > > virtio_dev_vring_start(struct virtqueue *vq, int queue_type) > > { > > struct rte_mbuf *m; > > - int i, nbufs, error, size = vq->vq_nentries; > > + int nbufs, error, size = vq->vq_nentries; > > struct vring *vr = &vq->vq_ring; > > uint8_t *ring_mem = vq->vq_ring_virt_mem; > > > > @@ -279,10 +302,7 @@ virtio_dev_vring_start(struct virtqueue *vq, int > > queue_type) > > vq->vq_free_cnt = vq->vq_nentries; > > memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries); > > > > - /* Chain all the descriptors in the ring with an END */ > > - for (i = 0; i < size - 1; i++) > > - vr->desc[i].next = (uint16_t)(i + 1); > > - vr->desc[i].next = VQ_RING_DESC_CHAIN_END; > > + vring_desc_init(vr->desc, size); > > > > /* > > * Disable device(host) interrupting guest > > @@ -760,7 +780,15 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf > > **tx_pkts, uint16_t nb_pkts) > > > > for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { > > struct rte_mbuf *txm = tx_pkts[nb_tx]; > > - int need = txm->nb_segs - txvq->vq_free_cnt + 1; > > + int use_indirect, slots, need; > > + > > + use_indirect = vtpci_with_feature(txvq->hw, > > + VIRTIO_RING_F_INDIRECT_DESC) > > + && (txm->nb_segs < VIRTIO_MAX_TX_INDIRECT); > > + > > + /* How many ring entries are needed to this Tx? */ > "how many descs" is more accurate , at least to me, ring entries/slots > means entries/slots in avail ring. > If it is OK, s/slots/descs/ as well. The virtio spec doesn't use the words descriptors. that is more an Intel driver terminolgy. > > > + slots = use_indirect ? 1 : 1 + txm->nb_segs; > > + need = slots - txvq->vq_free_cnt; > > > > /* Positive value indicates it need free vring descriptors */ > > if (need > 0) { > > @@ -769,7 +797,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf > > **tx_pkts, uint16_t nb_pkts) > > need = RTE_MIN(need, (int)nb_used); > > > > virtio_xmit_cleanup(txvq, need); > > - need = txm->nb_segs - txvq->vq_free_cnt + 1; > > + need = slots - txvq->vq_free_cnt; > > if (unlikely(need > 0)) { > > PMD_TX_LOG(ERR, > > "No free tx descriptors to > > transmit"); > > @@ -787,7 +815,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf > > **tx_pkts, uint16_t nb_pkts) > > } > > > > /* Enqueue Packet buffers */ > > - error = virtqueue_enqueue_xmit(txvq, txm); > > + error = virtqueue_enqueue_xmit(txvq, txm, use_indirect); > > if (unlikely(error)) { > > if (error == ENOSPC) > > PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = > > 0"); > > diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h > > index 7789411..fe3fa66 100644 > > --- a/drivers/net/virtio/virtqueue.h > > +++ b/drivers/net/virtio/virtqueue.h > > @@ -237,6 +237,25 @@ struct virtio_net_hdr_mrg_rxbuf { > > uint16_t num_buffers; /**< Number of merged rx buffers */ > > }; > > > > +/* Region reserved to allow for transmit header and indirect ring */ > > +#define VIRTIO_MAX_TX_INDIRECT 8 > > +struct virtio_tx_region { > > + struct virtio_net_hdr_mrg_rxbuf tx_hdr; > Any reason to use merge-able header here? > > + struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT] > > + __attribute__((__aligned__(16))); > WARNING: __aligned(size) is preferred over __attribute__((aligned(size))) > [...]