Instead of calculating the address of a packed descriptor based on the
vq->desc_packed and vq->last_used_idx every time, store that base
address in desc_base. On arm, this saves 176 bytes in code size of
function in which vhost_flush_enqueue_batch_packed gets inlined.

Signed-off-by: Balazs Nemeth <bnem...@redhat.com>
---
 lib/librte_vhost/virtio_net.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 179c57b46..f091384a6 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -217,6 +217,8 @@ vhost_flush_enqueue_batch_packed(struct virtio_net *dev,
 {
        uint16_t i;
        uint16_t flags;
+       uint16_t last_used_idx = vq->last_used_idx;
+       struct vring_packed_desc *desc_base = &vq->desc_packed[last_used_idx];
 
        if (vq->shadow_used_idx) {
                do_data_copy_enqueue(dev, vq);
@@ -226,16 +228,17 @@ vhost_flush_enqueue_batch_packed(struct virtio_net *dev,
        flags = PACKED_DESC_ENQUEUE_USED_FLAG(vq->used_wrap_counter);
 
        vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
-               vq->desc_packed[vq->last_used_idx + i].id = ids[i];
-               vq->desc_packed[vq->last_used_idx + i].len = lens[i];
+               desc_base[i].id = ids[i];
+               desc_base[i].len = lens[i];
        }
 
        rte_atomic_thread_fence(__ATOMIC_RELEASE);
 
-       vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
-               vq->desc_packed[vq->last_used_idx + i].flags = flags;
+       vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+               desc_base[i].flags = flags;
+       }
 
-       vhost_log_cache_used_vring(dev, vq, vq->last_used_idx *
+       vhost_log_cache_used_vring(dev, vq, last_used_idx *
                                   sizeof(struct vring_packed_desc),
                                   sizeof(struct vring_packed_desc) *
                                   PACKED_BATCH_SIZE);
-- 
2.30.2

Reply via email to