In fast enqueue function, will first check whether descriptors are cache aligned. Fast enqueue function will check prerequisites in the beginning. Fast enqueue function do not support chained mbufs, normal function will handle that.
Signed-off-by: Marvin Liu <yong....@intel.com> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 884befa85..f24026acd 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -39,6 +39,8 @@ #define VHOST_LOG_CACHE_NR 32 +/* Used in fast packed ring functions */ +#define PACKED_DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_packed_desc)) /** * Structure contains buffer address, length and descriptor index * from vring to do scatter RX. diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 003aec1d4..b877510da 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -897,6 +897,115 @@ virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, return pkt_idx; } +static __rte_always_inline uint16_t +virtio_dev_rx_fast_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct rte_mbuf **pkts) +{ + bool wrap_counter = vq->avail_wrap_counter; + struct vring_packed_desc *descs = vq->desc_packed; + uint16_t avail_idx = vq->last_avail_idx; + uint64_t desc_addr, desc_addr1, desc_addr2, desc_addr3, len, len1, + len2, len3; + struct virtio_net_hdr_mrg_rxbuf *hdr, *hdr1, *hdr2, *hdr3; + uint32_t buf_offset = dev->vhost_hlen; + + if (unlikely(avail_idx & 0x3)) + return -1; + + if (unlikely(avail_idx < (vq->size - PACKED_DESC_PER_CACHELINE))) + rte_prefetch0((void *)(uintptr_t)&descs[avail_idx + + PACKED_DESC_PER_CACHELINE]); + else + rte_prefetch0((void *)(uintptr_t)&descs[0]); + + if (unlikely((pkts[0]->next != NULL) | + (pkts[1]->next != NULL) | + (pkts[2]->next != NULL) | + (pkts[3]->next != NULL))) + return -1; + + if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter)) | + unlikely(!desc_is_avail(&descs[avail_idx + 1], wrap_counter)) | + unlikely(!desc_is_avail(&descs[avail_idx + 2], wrap_counter)) | + unlikely(!desc_is_avail(&descs[avail_idx + 3], wrap_counter))) + return 1; + + rte_smp_rmb(); + + len = descs[avail_idx].len; + len1 = descs[avail_idx + 1].len; + len2 = descs[avail_idx + 2].len; + len3 = descs[avail_idx + 3].len; + + if (unlikely((pkts[0]->pkt_len > (len - buf_offset)) | + (pkts[1]->pkt_len > (len1 - buf_offset)) | + (pkts[2]->pkt_len > (len2 - buf_offset)) | + (pkts[3]->pkt_len > (len3 - buf_offset)))) + return -1; + + desc_addr = vhost_iova_to_vva(dev, vq, + descs[avail_idx].addr, + &len, + VHOST_ACCESS_RW); + + desc_addr1 = vhost_iova_to_vva(dev, vq, + descs[avail_idx + 1].addr, + &len1, + VHOST_ACCESS_RW); + + desc_addr2 = vhost_iova_to_vva(dev, vq, + descs[avail_idx + 2].addr, + &len2, + VHOST_ACCESS_RW); + + desc_addr3 = vhost_iova_to_vva(dev, vq, + descs[avail_idx + 3].addr, + &len3, + VHOST_ACCESS_RW); + + if (unlikely((len != descs[avail_idx].len) | + (len1 != descs[avail_idx + 1].len) | + (len2 != descs[avail_idx + 2].len) | + (len3 != descs[avail_idx + 3].len))) + return -1; + + hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr; + hdr1 = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr1; + hdr2 = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr2; + hdr3 = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr3; + + virtio_enqueue_offload(pkts[0], &hdr->hdr); + virtio_enqueue_offload(pkts[1], &hdr1->hdr); + virtio_enqueue_offload(pkts[2], &hdr2->hdr); + virtio_enqueue_offload(pkts[3], &hdr3->hdr); + + len = pkts[0]->pkt_len + dev->vhost_hlen; + len1 = pkts[1]->pkt_len + dev->vhost_hlen; + len2 = pkts[2]->pkt_len + dev->vhost_hlen; + len3 = pkts[3]->pkt_len + dev->vhost_hlen; + + vq->last_avail_idx += PACKED_DESC_PER_CACHELINE; + if (vq->last_avail_idx >= vq->size) { + vq->last_avail_idx -= vq->size; + vq->avail_wrap_counter ^= 1; + } + + rte_memcpy((void *)(uintptr_t)(desc_addr + buf_offset), + rte_pktmbuf_mtod_offset(pkts[0], void *, 0), + pkts[0]->pkt_len); + rte_memcpy((void *)(uintptr_t)(desc_addr1 + buf_offset), + rte_pktmbuf_mtod_offset(pkts[1], void *, 0), + pkts[1]->pkt_len); + rte_memcpy((void *)(uintptr_t)(desc_addr2 + buf_offset), + rte_pktmbuf_mtod_offset(pkts[2], void *, 0), + pkts[2]->pkt_len); + rte_memcpy((void *)(uintptr_t)(desc_addr3 + buf_offset), + rte_pktmbuf_mtod_offset(pkts[3], void *, 0), + pkts[3]->pkt_len); + + return 0; +} + static __rte_always_inline uint16_t virtio_dev_rx_normal_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mbuf *pkt) -- 2.17.1