On Thu, Dec 20, 2018 at 05:47:55PM +0100, Maxime Coquelin wrote: > Instead of writing back descriptors chains in order, let's > write the first chain flags last in order to improve batching. > > Also, move the write barrier in logging cache sync, so that it > is done only when logging is enabled. It means there is now > one more barrier for split ring when logging is enabled. > > With Kernel's pktgen benchmark, ~3% performance gain is measured. > > Signed-off-by: Maxime Coquelin <maxime.coque...@redhat.com>
Acked-by: Michael S. Tsirkin <m...@redhat.com> > --- > lib/librte_vhost/vhost.h | 2 ++ > lib/librte_vhost/virtio_net.c | 19 ++++++++++++++++--- > 2 files changed, 18 insertions(+), 3 deletions(-) > > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h > index 3b3265c4b..7d1d8a308 100644 > --- a/lib/librte_vhost/vhost.h > +++ b/lib/librte_vhost/vhost.h > @@ -457,6 +457,8 @@ vhost_log_cache_sync(struct virtio_net *dev, struct > vhost_virtqueue *vq) > !dev->log_base)) > return; > > + rte_smp_wmb(); > + > log_base = (unsigned long *)(uintptr_t)dev->log_base; > > /* > diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c > index 8c657a101..02c1fd3a4 100644 > --- a/lib/librte_vhost/virtio_net.c > +++ b/lib/librte_vhost/virtio_net.c > @@ -97,6 +97,8 @@ flush_shadow_used_ring_packed(struct virtio_net *dev, > { > int i; > uint16_t used_idx = vq->last_used_idx; > + uint16_t head_idx = vq->last_used_idx; > + uint16_t head_flags = 0; > > /* Split loop in two to save memory barriers */ > for (i = 0; i < vq->shadow_used_idx; i++) { > @@ -126,12 +128,17 @@ flush_shadow_used_ring_packed(struct virtio_net *dev, > flags &= ~VRING_DESC_F_AVAIL; > } > > - vq->desc_packed[vq->last_used_idx].flags = flags; > + if (i > 0) { > + vq->desc_packed[vq->last_used_idx].flags = flags; > > - vhost_log_cache_used_vring(dev, vq, > + vhost_log_cache_used_vring(dev, vq, > vq->last_used_idx * > sizeof(struct vring_packed_desc), > sizeof(struct vring_packed_desc)); > + } else { > + head_idx = vq->last_used_idx; > + head_flags = flags; > + } > > vq->last_used_idx += vq->shadow_used_packed[i].count; > if (vq->last_used_idx >= vq->size) { > @@ -140,7 +147,13 @@ flush_shadow_used_ring_packed(struct virtio_net *dev, > } > } > > - rte_smp_wmb(); > + vq->desc_packed[head_idx].flags = head_flags; > + > + vhost_log_cache_used_vring(dev, vq, > + head_idx * > + sizeof(struct vring_packed_desc), > + sizeof(struct vring_packed_desc)); > + > vq->shadow_used_idx = 0; > vhost_log_cache_sync(dev, vq); > } > -- > 2.17.2