The rarp packet broadcast flag is synchronized with rte_atomic_XX APIs which is a full barrier, DMB, on aarch64. This patch optimized it with c11 atomic one-way barrier.
Signed-off-by: Phil Yang <phil.y...@arm.com> Reviewed-by: Gavin Hu <gavin...@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> Reviewed-by: Joyce Kong <joyce.k...@arm.com> --- lib/librte_vhost/vhost.h | 2 +- lib/librte_vhost/vhost_user.c | 7 +++---- lib/librte_vhost/virtio_net.c | 16 +++++++++------- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 2087d14..0e22125 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -350,7 +350,7 @@ struct virtio_net { uint32_t flags; uint16_t vhost_hlen; /* to tell if we need broadcast rarp packet */ - rte_atomic16_t broadcast_rarp; + int16_t broadcast_rarp; uint32_t nr_vring; int dequeue_zero_copy; int extbuf; diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index bd1be01..857187d 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -2145,11 +2145,10 @@ vhost_user_send_rarp(struct virtio_net **pdev, struct VhostUserMsg *msg, * Set the flag to inject a RARP broadcast packet at * rte_vhost_dequeue_burst(). * - * rte_smp_wmb() is for making sure the mac is copied - * before the flag is set. + * __ATOMIC_RELEASE ordering is for making sure the mac is + * copied before the flag is set. */ - rte_smp_wmb(); - rte_atomic16_set(&dev->broadcast_rarp, 1); + __atomic_store_n(&dev->broadcast_rarp, 1, __ATOMIC_RELEASE); did = dev->vdpa_dev_id; vdpa_dev = rte_vdpa_get_device(did); if (vdpa_dev && vdpa_dev->ops->migration_done) diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 37c47c7..fa10deb 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -2203,6 +2203,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, struct virtio_net *dev; struct rte_mbuf *rarp_mbuf = NULL; struct vhost_virtqueue *vq; + int16_t success = 1; dev = get_device(vid); if (!dev) @@ -2249,16 +2250,17 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, * * broadcast_rarp shares a cacheline in the virtio_net structure * with some fields that are accessed during enqueue and - * rte_atomic16_cmpset() causes a write if using cmpxchg. This could - * result in false sharing between enqueue and dequeue. + * __atomic_compare_exchange_n causes a write if performed compare + * and exchange. This could result in false sharing between enqueue + * and dequeue. * * Prevent unnecessary false sharing by reading broadcast_rarp first - * and only performing cmpset if the read indicates it is likely to - * be set. + * and only performing compare and exchange if the read indicates it + * is likely to be set. */ - if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) && - rte_atomic16_cmpset((volatile uint16_t *) - &dev->broadcast_rarp.cnt, 1, 0))) { + if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) && + __atomic_compare_exchange_n(&dev->broadcast_rarp, + &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) { rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); if (rarp_mbuf == NULL) { -- 2.7.4