On 6/1/2015 4:26 PM, Ouyang, Changchun wrote:
> Vring enqueue need consider the 2 cases:
>  1. use separate descriptors to contain virtio header and actual data, e.g. 
> the first descriptor
>     is for virtio header, and then followed by descriptors for actual data.
>  2. virtio header and some data are put together in one descriptor, e.g. the 
> first descriptor contain both
>     virtio header and part of actual data, and then followed by more 
> descriptors for rest of packet data,
>     current DPDK based virtio-net pmd implementation is this case;
>
> So does vring dequeue, it should not assume vring descriptor is chained or 
> not chained, it should use
> desc->flags to check whether it is chained or not. this patch also fixes TX 
> corrupt issue in fedora 21
> which uses one single vring descriptor(header and data are in one descriptor) 
> for virtio tx process on default.
Suggest remove fedora 21 in the commit message, at least the bug is
related to virtio-net driver rather than distribution.
> Changes in v3
>   - support scattered mbuf, check the mbuf has 'next' pointer or not and copy 
> all segments to vring buffer.
>
> Changes in v2
>   - drop the uncompleted packet
>   - refine code logic
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang at intel.com>
> ---
>  lib/librte_vhost/vhost_rxtx.c | 88 
> ++++++++++++++++++++++++++++++++++---------
>  1 file changed, 71 insertions(+), 17 deletions(-)
>
> diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
> index 4809d32..5fe1b6c 100644
> --- a/lib/librte_vhost/vhost_rxtx.c
> +++ b/lib/librte_vhost/vhost_rxtx.c
> @@ -46,7 +46,8 @@
>   * This function adds buffers to the virtio devices RX virtqueue. Buffers can
>   * be received from the physical port or from another virtio device. A packet
>   * count is returned to indicate the number of packets that are succesfully
> - * added to the RX queue. This function works when mergeable is disabled.
> + * added to the RX queue. This function works when the mbuf is scattered, but
> + * it doesn't support the mergeable feature.
>   */
>  static inline uint32_t __attribute__((always_inline))
>  virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
> @@ -59,7 +60,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>       struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
>       uint64_t buff_addr = 0;
>       uint64_t buff_hdr_addr = 0;
> -     uint32_t head[MAX_PKT_BURST], packet_len = 0;
> +     uint32_t head[MAX_PKT_BURST];
>       uint32_t head_idx, packet_success = 0;
>       uint16_t avail_idx, res_cur_idx;
>       uint16_t res_base_idx, res_end_idx;
> @@ -113,6 +114,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>       rte_prefetch0(&vq->desc[head[packet_success]]);
>  
>       while (res_cur_idx != res_end_idx) {
> +             uint32_t offset = 0, vb_offset = 0;
> +             uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0;
> +             uint8_t hdr = 0, uncompleted_pkt = 0;
> +
>               /* Get descriptor from available ring */
>               desc = &vq->desc[head[packet_success]];
>  
> @@ -125,7 +130,6 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>  
>               /* Copy virtio_hdr to packet and increment buffer address */
>               buff_hdr_addr = buff_addr;
> -             packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
>  
>               /*
>                * If the descriptors are chained the header and data are
> @@ -136,28 +140,73 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
>                       desc = &vq->desc[desc->next];
>                       /* Buffer address translation. */
>                       buff_addr = gpa_to_vva(dev, desc->addr);
> -                     desc->len = rte_pktmbuf_data_len(buff);
>               } else {
> -                     buff_addr += vq->vhost_hlen;
> -                     desc->len = packet_len;
> +                     vb_offset += vq->vhost_hlen;
> +                     hdr = 1;
>               }
>  
> +             pkt_len = rte_pktmbuf_pkt_len(buff);
> +             data_len = rte_pktmbuf_data_len(buff);
> +             len_to_cpy = RTE_MIN(data_len,
> +                     hdr ? desc->len - vq->vhost_hlen : desc->len);
> +             while (len_to_cpy > 0) {
while (total_copied < pkt_len) is secure and readable.
Besides,  what if we encounter some descriptor with zero length? With
len_to_cpy > 0, we would pass a partially copied mbuf to guest but still
used->len = packet_len.
> +                     /* Copy mbuf data to buffer */
> +                     rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset),
> +                             (const void *)(rte_pktmbuf_mtod(buff, const 
> char *) + offset),
> +                             len_to_cpy);
> +                     PRINT_PACKET(dev, (uintptr_t)(buff_addr + vb_offset),
> +                             len_to_cpy, 0);
> +
> +                     offset += len_to_cpy;
> +                     vb_offset += len_to_cpy;
> +                     total_copied += len_to_cpy;
> +
> +                     /* The whole packet completes */
> +                     if (total_copied == pkt_len)
> +                             break;
> +
> +                     /* The current segment completes */
> +                     if (offset == data_len) {
> +                             buff = buff->next;
> +                             if (buff != NULL) {
> +                                     offset = 0;
> +                                     data_len = rte_pktmbuf_data_len(buff);
> +                             }
What if (buf == NULL)? Either we treat mbuf reliable, and don't do any
sanity check, or we check thoroughly.
if (buff != NULL) {

} else {
    ...
    break;
}
> +                     }
> +
> +                     /* The current vring descriptor done */
> +                     if (vb_offset == desc->len) {
> +                             if (desc->flags & VRING_DESC_F_NEXT) {
> +                                     desc = &vq->desc[desc->next];
> +                                     buff_addr = gpa_to_vva(dev, desc->addr);
> +                                     vb_offset = 0;
> +                             } else {
> +                                     /* Room in vring buffer is not enough */
> +                                     uncompleted_pkt = 1;
> +                                     break;
> +                             }
> +                     }
> +                     len_to_cpy = RTE_MIN(data_len - offset, desc->len - 
> vb_offset);
> +             };
> +
>               /* Update used ring with desc information */
>               vq->used->ring[res_cur_idx & (vq->size - 1)].id =
>                                                       head[packet_success];
> -             vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
>  
> -             /* Copy mbuf data to buffer */
> -             /* FIXME for sg mbuf and the case that desc couldn't hold the 
> mbuf data */
> -             rte_memcpy((void *)(uintptr_t)buff_addr,
> -                     rte_pktmbuf_mtod(buff, const void *),
> -                     rte_pktmbuf_data_len(buff));
> -             PRINT_PACKET(dev, (uintptr_t)buff_addr,
> -                     rte_pktmbuf_data_len(buff), 0);
> +             /* Drop the packet if it is uncompleted */
> +             if (unlikely(uncompleted_pkt == 1))
> +                     vq->used->ring[res_cur_idx & (vq->size - 1)].len =
> +                                                     vq->vhost_hlen;
> +             else
> +                     vq->used->ring[res_cur_idx & (vq->size - 1)].len =
> +                                                     pkt_len + 
> vq->vhost_hlen;
>  
>               res_cur_idx++;
>               packet_success++;
>  
> +             if (unlikely(uncompleted_pkt == 1))
> +                     continue;
> +
>               rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
>                       (const void *)&virtio_hdr, vq->vhost_hlen);
>  
> @@ -589,7 +638,14 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
> queue_id,
>               desc = &vq->desc[head[entry_success]];
>  
>               /* Discard first buffer as it is the virtio header */
> -             desc = &vq->desc[desc->next];
> +             if (desc->flags & VRING_DESC_F_NEXT) {
> +                     desc = &vq->desc[desc->next];
> +                     vb_offset = 0;
> +                     vb_avail = desc->len;
> +             } else {
> +                     vb_offset = vq->vhost_hlen;
> +                     vb_avail = desc->len - vb_offset;
> +             }
>  
>               /* Buffer address translation. */
>               vb_addr = gpa_to_vva(dev, desc->addr);
> @@ -608,8 +664,6 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t 
> queue_id,
>               vq->used->ring[used_idx].id = head[entry_success];
>               vq->used->ring[used_idx].len = 0;
>  
> -             vb_offset = 0;
> -             vb_avail = desc->len;
>               /* Allocate an mbuf and populate the structure. */
>               m = rte_pktmbuf_alloc(mbuf_pool);
>               if (unlikely(m == NULL)) {

Reply via email to