> -----Original Message----- > From: Xie, Huawei > Sent: Sunday, May 31, 2015 4:41 PM > To: Ouyang, Changchun; dev at dpdk.org > Cc: Cao, Waterman > Subject: Re: [PATCH v2 1/5] lib_vhost: Fix enqueue/dequeue can't handle > chained vring descriptors > > On 5/28/2015 11:17 PM, Ouyang, Changchun wrote: > > Vring enqueue need consider the 2 cases: > > 1. Vring descriptors chained together, the first one is for virtio header, > > the > rest are for real > > data, virtio driver in Linux usually use this scheme; 2. Only one > > descriptor, virtio header and real data share one single descriptor, virtio- > net pmd use > > such scheme; > > > > So does vring dequeue, it should not assume vring descriptor is > > chained or not chained, virtio in different Linux version has > > different behavior, e.g. fedora 20 use chained vring descriptor, while > fedora 21 use one single vring descriptor for tx. > > > > Changes in v2 > > - drop the uncompleted packet > > - refine code logic > > > > Signed-off-by: Changchun Ouyang <changchun.ouyang at intel.com> > > --- > > lib/librte_vhost/vhost_rxtx.c | 65 > > +++++++++++++++++++++++++++++++++---------- > > 1 file changed, 50 insertions(+), 15 deletions(-) > > > > diff --git a/lib/librte_vhost/vhost_rxtx.c > > b/lib/librte_vhost/vhost_rxtx.c index 4809d32..06ae2df 100644 > > --- a/lib/librte_vhost/vhost_rxtx.c > > +++ b/lib/librte_vhost/vhost_rxtx.c > > @@ -59,7 +59,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t > queue_id, > > struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; > > uint64_t buff_addr = 0; > > uint64_t buff_hdr_addr = 0; > > - uint32_t head[MAX_PKT_BURST], packet_len = 0; > > + uint32_t head[MAX_PKT_BURST]; > > uint32_t head_idx, packet_success = 0; > > uint16_t avail_idx, res_cur_idx; > > uint16_t res_base_idx, res_end_idx; > > @@ -113,6 +113,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t > queue_id, > > rte_prefetch0(&vq->desc[head[packet_success]]); > > > > while (res_cur_idx != res_end_idx) { > > + uint32_t offset = 0; > > + uint32_t data_len, len_to_cpy; > > + uint8_t hdr = 0, uncompleted_pkt = 0; > > + > > /* Get descriptor from available ring */ > > desc = &vq->desc[head[packet_success]]; > > > > @@ -125,7 +129,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t > > queue_id, > > > > /* Copy virtio_hdr to packet and increment buffer address */ > > buff_hdr_addr = buff_addr; > > - packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen; > > > > /* > > * If the descriptors are chained the header and data are @@ > > -136,28 +139,55 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t > queue_id, > > desc = &vq->desc[desc->next]; > > /* Buffer address translation. */ > > buff_addr = gpa_to_vva(dev, desc->addr); > > - desc->len = rte_pktmbuf_data_len(buff); > Do we got confirm from virtio SPEC that it is OK to only update used->len?
Virtio Spec don't require vhost update desc->len. > > } else { > > buff_addr += vq->vhost_hlen; > > - desc->len = packet_len; > > + hdr = 1; > > } > > > > + data_len = rte_pktmbuf_data_len(buff); > > + len_to_cpy = RTE_MIN(data_len, > > + hdr ? desc->len - vq->vhost_hlen : desc->len); > > + while (len_to_cpy > 0) { > > + /* Copy mbuf data to buffer */ > > + rte_memcpy((void *)(uintptr_t)buff_addr, > > + (const void *)(rte_pktmbuf_mtod(buff, > const char *) + offset), > > + len_to_cpy); > > + PRINT_PACKET(dev, (uintptr_t)buff_addr, > > + len_to_cpy, 0); > > + > > + offset += len_to_cpy; > > + > > + if (offset == data_len) > > + break; > Ok, i see scatter gather case handling is in patch 5. > > + > > + if (desc->flags & VRING_DESC_F_NEXT) { > > + desc = &vq->desc[desc->next]; > > + buff_addr = gpa_to_vva(dev, desc->addr); > > + len_to_cpy = RTE_MIN(data_len - offset, > desc->len); > > + } else { > > + /* Room in vring buffer is not enough */ > > + uncompleted_pkt = 1; > > + break; > > + } > > + }; > > + > > /* Update used ring with desc information */ > > vq->used->ring[res_cur_idx & (vq->size - 1)].id = > > > head[packet_success]; > > - vq->used->ring[res_cur_idx & (vq->size - 1)].len = > packet_len; > > > > - /* Copy mbuf data to buffer */ > > - /* FIXME for sg mbuf and the case that desc couldn't hold the > mbuf data */ > > - rte_memcpy((void *)(uintptr_t)buff_addr, > > - rte_pktmbuf_mtod(buff, const void *), > > - rte_pktmbuf_data_len(buff)); > > - PRINT_PACKET(dev, (uintptr_t)buff_addr, > > - rte_pktmbuf_data_len(buff), 0); > > + /* Drop the packet if it is uncompleted */ > > + if (unlikely(uncompleted_pkt == 1)) > > + vq->used->ring[res_cur_idx & (vq->size - 1)].len = 0; > Here things become complicated with the previous lockless reserve. Why it become complicated? Len = 0 means it contain any meaningful data in the buffer. > What is the consequence when guest sees zero in used->len? At least, do we > check with virtio-net implementation? > > > + else > > + vq->used->ring[res_cur_idx & (vq->size - 1)].len = > > + offset + vq- > >vhost_hlen; > Two questions here, > 1. add virtio header len? > 2. Why not use packet_len rather than offset? > > > > res_cur_idx++; > > packet_success++; > > > > + if (unlikely(uncompleted_pkt == 1)) > > + continue; > > + > > rte_memcpy((void *)(uintptr_t)buff_hdr_addr, > > (const void *)&virtio_hdr, vq->vhost_hlen); > > > > @@ -589,7 +619,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, > uint16_t queue_id, > > desc = &vq->desc[head[entry_success]]; > > > > /* Discard first buffer as it is the virtio header */ > > - desc = &vq->desc[desc->next]; > > + if (desc->flags & VRING_DESC_F_NEXT) { > > + desc = &vq->desc[desc->next]; > > + vb_offset = 0; > > + vb_avail = desc->len; > > + } else { > > + vb_offset = vq->vhost_hlen; > > + vb_avail = desc->len - vb_offset; > > + } > > > > /* Buffer address translation. */ > > vb_addr = gpa_to_vva(dev, desc->addr); @@ -608,8 +645,6 > @@ > > rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, > > vq->used->ring[used_idx].id = head[entry_success]; > > vq->used->ring[used_idx].len = 0; > > > > - vb_offset = 0; > > - vb_avail = desc->len; > > /* Allocate an mbuf and populate the structure. */ > > m = rte_pktmbuf_alloc(mbuf_pool); > > if (unlikely(m == NULL)) {