Split vhost enqueue and dequeue shadow used ring update function. Vhost enqueue shadow used descs update will be calculated by cache line. Enqueue sshadow used descs update will be buffered until exceed one cache line.
Signed-off-by: Marvin Liu <yong....@intel.com> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 329a7658b..b8198747e 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -145,6 +145,7 @@ struct vhost_virtqueue { struct vring_used_elem_packed *shadow_used_packed; }; uint16_t shadow_used_idx; + uint16_t enqueue_shadow_count; struct vhost_vring_addr ring_addrs; struct batch_copy_elem *batch_copy_elems; diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index a62e0feda..96f7a8bec 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -158,6 +158,90 @@ flush_shadow_used_ring_packed(struct virtio_net *dev, vhost_log_cache_sync(dev, vq); } +static __rte_always_inline void +flush_enqueue_used_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq) +{ + int i; + uint16_t used_idx = vq->last_used_idx; + uint16_t head_idx = vq->last_used_idx; + uint16_t head_flags = 0; + + /* Split loop in two to save memory barriers */ + for (i = 0; i < vq->shadow_used_idx; i++) { + vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id; + vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len; + + used_idx += vq->shadow_used_packed[i].count; + if (used_idx >= vq->size) + used_idx -= vq->size; + } + + rte_smp_wmb(); + + for (i = 0; i < vq->shadow_used_idx; i++) { + uint16_t flags; + + if (vq->shadow_used_packed[i].len) + flags = VRING_DESC_F_WRITE; + else + flags = 0; + + if (vq->used_wrap_counter) { + flags |= VRING_DESC_F_USED; + flags |= VRING_DESC_F_AVAIL; + } else { + flags &= ~VRING_DESC_F_USED; + flags &= ~VRING_DESC_F_AVAIL; + } + + if (i > 0) { + vq->desc_packed[vq->last_used_idx].flags = flags; + + vhost_log_cache_used_vring(dev, vq, + vq->last_used_idx * + sizeof(struct vring_packed_desc), + sizeof(struct vring_packed_desc)); + } else { + head_idx = vq->last_used_idx; + head_flags = flags; + } + + vq->last_used_idx += vq->shadow_used_packed[i].count; + if (vq->last_used_idx >= vq->size) { + vq->used_wrap_counter ^= 1; + vq->last_used_idx -= vq->size; + } + } + + vq->desc_packed[head_idx].flags = head_flags; + + vhost_log_cache_used_vring(dev, vq, + head_idx * + sizeof(struct vring_packed_desc), + sizeof(struct vring_packed_desc)); + + vq->shadow_used_idx = 0; + vhost_log_cache_sync(dev, vq); +} + +static __rte_always_inline void +update_enqueue_shadow_used_ring_packed(struct vhost_virtqueue *vq, + uint16_t desc_idx, uint32_t len, + uint16_t count) +{ + if (!vq->shadow_used_idx) + vq->enqueue_shadow_count = vq->last_used_idx & 0x3; + + uint16_t i = vq->shadow_used_idx++; + + vq->shadow_used_packed[i].id = desc_idx; + vq->shadow_used_packed[i].len = len; + vq->shadow_used_packed[i].count = count; + + vq->enqueue_shadow_count += count; +} + static __rte_always_inline void update_shadow_used_ring_packed(struct vhost_virtqueue *vq, uint16_t desc_idx, uint32_t len, uint16_t count) @@ -198,6 +282,24 @@ do_data_copy_dequeue(struct vhost_virtqueue *vq) vq->batch_copy_nb_elems = 0; } +static __rte_always_inline void +flush_enqueue_shadow_used_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, uint32_t len[], + uint16_t id[], uint16_t count[], uint16_t num_buffers) +{ + int i; + for (i = 0; i < num_buffers; i++) { + update_enqueue_shadow_used_ring_packed(vq, id[i], len[i], + count[i]); + + if (vq->enqueue_shadow_count >= PACKED_DESC_PER_CACHELINE) { + do_data_copy_enqueue(dev, vq); + flush_enqueue_used_packed(dev, vq); + } + } +} + + /* avoid write operation when necessary, to lessen cache issues */ #define ASSIGN_UNLESS_EQUAL(var, val) do { \ if ((var) != (val)) \ @@ -800,6 +902,9 @@ vhost_enqueue_normal_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, max_tries = 1; uint16_t num_buffers = 0; + uint32_t buffer_len[max_tries]; + uint16_t buffer_buf_id[max_tries]; + uint16_t buffer_desc_count[max_tries]; while (size > 0) { /* @@ -822,6 +927,10 @@ vhost_enqueue_normal_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, size -= len; + buffer_len[num_buffers] = len; + buffer_buf_id[num_buffers] = buf_id; + buffer_desc_count[num_buffers] = desc_count; + avail_idx += desc_count; if (avail_idx >= vq->size) avail_idx -= vq->size; @@ -836,6 +945,9 @@ vhost_enqueue_normal_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, return 0; } + flush_enqueue_shadow_used_packed(dev, vq, buffer_len, buffer_buf_id, + buffer_desc_count, num_buffers); + return 0; } -- 2.17.1