On 05.09.2019 19:14, Marvin Liu wrote: > Burst enqueue function will first check whether descriptors are cache > aligned. It will also check prerequisites in the beginning. Burst > enqueue function not support chained mbufs, single packet enqueue > function will handle it. > > Signed-off-by: Marvin Liu <yong....@intel.com>
Hi. Can we rely on loop unrolling by compiler instead of repeating each command 4 times? For example: uint64_t len[PACKED_DESCS_BURST]; for (i = 0; i < PACKED_DESCS_BURST; i++) len[i] = descs[avail_idx + i].len; For 'if's: res = false; for (i = 0; i < PACKED_DESCS_BURST; i++) res |= pkts[i]->next != NULL; if (unlikely(res)) return -1; or just for (i = 0; i < PACKED_DESCS_BURST; i++) if (unlikely(pkts[i]->next != NULL)) return -1; Since PACKED_DESCS_BURST is a fairly small constant, loops should be unrolled by compiler producing almost same code. This will significantly reduce code size and will also allow to play with PACKED_DESCS_BURST value without massive code changes. Same is applicable to other patches in the series. What do you think? Best regards, Ilya Maximets. > > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h > index 884befa85..ed8b4aabf 100644 > --- a/lib/librte_vhost/vhost.h > +++ b/lib/librte_vhost/vhost.h > @@ -39,6 +39,8 @@ > > #define VHOST_LOG_CACHE_NR 32 > > +#define PACKED_DESCS_BURST 4 > +#define PACKED_BURST_MASK (PACKED_DESCS_BURST - 1) > /** > * Structure contains buffer address, length and descriptor index > * from vring to do scatter RX. > diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c > index 5ad0a8175..51ed20543 100644 > --- a/lib/librte_vhost/virtio_net.c > +++ b/lib/librte_vhost/virtio_net.c > @@ -896,6 +896,106 @@ virtio_dev_rx_split(struct virtio_net *dev, struct > vhost_virtqueue *vq, > return pkt_idx; > } > > +static __rte_unused uint16_t > +virtio_dev_rx_burst_packed(struct virtio_net *dev, struct vhost_virtqueue > *vq, > + struct rte_mbuf **pkts) > +{ > + bool wrap_counter = vq->avail_wrap_counter; > + struct vring_packed_desc *descs = vq->desc_packed; > + uint16_t avail_idx = vq->last_avail_idx; > + uint64_t desc_addr, desc_addr1, desc_addr2, desc_addr3; > + uint64_t len, len1, len2, len3; > + struct virtio_net_hdr_mrg_rxbuf *hdr, *hdr1, *hdr2, *hdr3; > + uint32_t buf_offset = dev->vhost_hlen; > + > + if (unlikely(avail_idx & PACKED_BURST_MASK)) > + return -1; > + > + if (unlikely((pkts[0]->next != NULL) | > + (pkts[1]->next != NULL) | > + (pkts[2]->next != NULL) | > + (pkts[3]->next != NULL))) > + return -1; > + > + if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter)) | > + unlikely(!desc_is_avail(&descs[avail_idx + 1], wrap_counter)) | > + unlikely(!desc_is_avail(&descs[avail_idx + 2], wrap_counter)) | > + unlikely(!desc_is_avail(&descs[avail_idx + 3], wrap_counter))) > + return 1; > + > + rte_smp_rmb(); > + > + len = descs[avail_idx].len; > + len1 = descs[avail_idx + 1].len; > + len2 = descs[avail_idx + 2].len; > + len3 = descs[avail_idx + 3].len; > + > + if (unlikely((pkts[0]->pkt_len > (len - buf_offset)) | > + (pkts[1]->pkt_len > (len1 - buf_offset)) | > + (pkts[2]->pkt_len > (len2 - buf_offset)) | > + (pkts[3]->pkt_len > (len3 - buf_offset)))) > + return -1; > + > + desc_addr = vhost_iova_to_vva(dev, vq, descs[avail_idx].addr, &len, > + VHOST_ACCESS_RW); > + > + desc_addr1 = vhost_iova_to_vva(dev, vq, descs[avail_idx + 1].addr, > + &len1, VHOST_ACCESS_RW); > + > + desc_addr2 = vhost_iova_to_vva(dev, vq, descs[avail_idx + 2].addr, > + &len2, VHOST_ACCESS_RW); > + > + desc_addr3 = vhost_iova_to_vva(dev, vq, descs[avail_idx + 3].addr, > + &len3, VHOST_ACCESS_RW); > + > + if (unlikely((len != descs[avail_idx].len) | > + (len1 != descs[avail_idx + 1].len) | > + (len2 != descs[avail_idx + 2].len) | > + (len3 != descs[avail_idx + 3].len))) > + return -1; > + > + rte_prefetch0((void *)(uintptr_t)desc_addr); > + rte_prefetch0((void *)(uintptr_t)desc_addr1); > + rte_prefetch0((void *)(uintptr_t)desc_addr2); > + rte_prefetch0((void *)(uintptr_t)desc_addr3); > + > + hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr; > + hdr1 = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr1; > + hdr2 = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr2; > + hdr3 = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr3; > + > + virtio_enqueue_offload(pkts[0], &hdr->hdr); > + virtio_enqueue_offload(pkts[1], &hdr1->hdr); > + virtio_enqueue_offload(pkts[2], &hdr2->hdr); > + virtio_enqueue_offload(pkts[3], &hdr3->hdr); > + > + len = pkts[0]->pkt_len + dev->vhost_hlen; > + len1 = pkts[1]->pkt_len + dev->vhost_hlen; > + len2 = pkts[2]->pkt_len + dev->vhost_hlen; > + len3 = pkts[3]->pkt_len + dev->vhost_hlen; > + > + vq->last_avail_idx += PACKED_DESCS_BURST; > + if (vq->last_avail_idx >= vq->size) { > + vq->last_avail_idx -= vq->size; > + vq->avail_wrap_counter ^= 1; > + } > + > + rte_memcpy((void *)(uintptr_t)(desc_addr + buf_offset), > + rte_pktmbuf_mtod_offset(pkts[0], void *, 0), > + pkts[0]->pkt_len); > + rte_memcpy((void *)(uintptr_t)(desc_addr1 + buf_offset), > + rte_pktmbuf_mtod_offset(pkts[1], void *, 0), > + pkts[1]->pkt_len); > + rte_memcpy((void *)(uintptr_t)(desc_addr2 + buf_offset), > + rte_pktmbuf_mtod_offset(pkts[2], void *, 0), > + pkts[2]->pkt_len); > + rte_memcpy((void *)(uintptr_t)(desc_addr3 + buf_offset), > + rte_pktmbuf_mtod_offset(pkts[3], void *, 0), > + pkts[3]->pkt_len); > + > + return 0; > +} > + > static __rte_unused int16_t > virtio_dev_rx_single_packed(struct virtio_net *dev, struct vhost_virtqueue > *vq, > struct rte_mbuf *pkt) >