This patch adds an operation callback which gets called every time the
library wants to call eventfd_write(). This eventfd_write() call could
result in a system call, which could potentially block the PMD thread.

The callback function can decide whether it's ok to handle the
eventfd_write() now or have the newly introduced function,
rte_vhost_notify_guest(), called at a later time.

This can be used by 3rd party applications, like OVS, to avoid system
calls being called as part of the PMD threads.

Signed-off-by: Eelco Chaudron <echau...@redhat.com>
---
 lib/vhost/meson.build |    2 +
 lib/vhost/rte_vhost.h |   23 +++++++++++++++-
 lib/vhost/socket.c    |   72 ++++++++++++++++++++++++++++++++++++++++++++++---
 lib/vhost/version.map |    9 ++++++
 lib/vhost/vhost.c     |   38 ++++++++++++++++++++++++++
 lib/vhost/vhost.h     |   65 +++++++++++++++++++++++++++++++-------------
 6 files changed, 184 insertions(+), 25 deletions(-)

diff --git a/lib/vhost/meson.build b/lib/vhost/meson.build
index 197a51d936..e93ba6b078 100644
--- a/lib/vhost/meson.build
+++ b/lib/vhost/meson.build
@@ -39,3 +39,5 @@ driver_sdk_headers = files(
         'vdpa_driver.h',
 )
 deps += ['ethdev', 'cryptodev', 'hash', 'pci', 'dmadev']
+
+use_function_versioning = true
diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
index 58a5d4be92..7a10bc36cf 100644
--- a/lib/vhost/rte_vhost.h
+++ b/lib/vhost/rte_vhost.h
@@ -298,7 +298,13 @@ struct rte_vhost_device_ops {
         */
        void (*guest_notified)(int vid);
 
-       void *reserved[1]; /**< Reserved for future extension */
+       /**
+        * If this callback is registered, notification to the guest can
+        * be handled by the front-end calling rte_vhost_notify_guest().
+        * If it's not handled, 'false' should be returned. This can be used
+        * to remove the "slow" eventfd_write() syscall from the datapath.
+        */
+       bool (*guest_notify)(int vid, uint16_t queue_id);
 };
 
 /**
@@ -433,6 +439,21 @@ void rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
 
 int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int 
enable);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice.
+ *
+ * Inject the offloaded interrupt into the vhost device's queue. For more
+ * details see the 'guest_notify' vhost device operation.
+ *
+ * @param vid
+ *  vhost device ID
+ * @param queue_id
+ *  virtio queue index
+ */
+__rte_experimental
+void rte_vhost_notify_guest(int vid, uint16_t queue_id);
+
 /**
  * Register vhost driver. path could be different for multiple
  * instance support.
diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
index 669c322e12..787d6bacf8 100644
--- a/lib/vhost/socket.c
+++ b/lib/vhost/socket.c
@@ -15,6 +15,7 @@
 #include <fcntl.h>
 #include <pthread.h>
 
+#include <rte_function_versioning.h>
 #include <rte_log.h>
 
 #include "fd_man.h"
@@ -43,6 +44,7 @@ struct vhost_user_socket {
        bool async_copy;
        bool net_compliant_ol_flags;
        bool stats_enabled;
+       bool alloc_notify_ops;
 
        /*
         * The "supported_features" indicates the feature bits the
@@ -846,6 +848,14 @@ vhost_user_socket_mem_free(struct vhost_user_socket 
*vsocket)
                vsocket->path = NULL;
        }
 
+       if (vsocket && vsocket->alloc_notify_ops) {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+               free((struct rte_vhost_device_ops *)vsocket->notify_ops);
+#pragma GCC diagnostic pop
+               vsocket->notify_ops = NULL;
+       }
+
        if (vsocket) {
                free(vsocket);
                vsocket = NULL;
@@ -1099,21 +1109,75 @@ rte_vhost_driver_unregister(const char *path)
 /*
  * Register ops so that we can add/remove device to data core.
  */
-int
-rte_vhost_driver_callback_register(const char *path,
-       struct rte_vhost_device_ops const * const ops)
+static int
+rte_vhost_driver_callback_register__(const char *path,
+       struct rte_vhost_device_ops const * const ops, bool ops_allocated)
 {
        struct vhost_user_socket *vsocket;
 
        pthread_mutex_lock(&vhost_user.mutex);
        vsocket = find_vhost_user_socket(path);
-       if (vsocket)
+       if (vsocket) {
+               if (vsocket->alloc_notify_ops) {
+                       vsocket->alloc_notify_ops = false;
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+                       free((struct rte_vhost_device_ops 
*)vsocket->notify_ops);
+#pragma GCC diagnostic pop
+               }
                vsocket->notify_ops = ops;
+               if (ops_allocated)
+                       vsocket->alloc_notify_ops = true;
+       }
        pthread_mutex_unlock(&vhost_user.mutex);
 
        return vsocket ? 0 : -1;
 }
 
+int __vsym
+rte_vhost_driver_callback_register_v24(const char *path,
+       struct rte_vhost_device_ops const * const ops)
+{
+       return rte_vhost_driver_callback_register__(path, ops, false);
+}
+
+int __vsym
+rte_vhost_driver_callback_register_v23(const char *path,
+       struct rte_vhost_device_ops const * const ops)
+{
+       int ret;
+
+       /*
+        * Although the ops structure is a const structure, we do need to
+        * override the guest_notify operation. This is because with the
+        * previous APIs it was "reserved" and if any garbage value was passed,
+        * it could crash the application.
+        */
+       if (ops && !ops->guest_notify) {
+               struct rte_vhost_device_ops *new_ops;
+
+               new_ops = malloc(sizeof(*new_ops));
+               if (new_ops == NULL)
+                       return -1;
+
+               memcpy(new_ops, ops, sizeof(*new_ops));
+               new_ops->guest_notify = NULL;
+
+               ret = rte_vhost_driver_callback_register__(path, ops, true);
+       } else {
+               ret = rte_vhost_driver_callback_register__(path, ops, false);
+       }
+
+       return ret;
+}
+
+/* Mark the v23 function as the old version, and v24 as the default version. */
+VERSION_SYMBOL(rte_vhost_driver_callback_register, _v23, 23);
+BIND_DEFAULT_SYMBOL(rte_vhost_driver_callback_register, _v24, 24);
+MAP_STATIC_SYMBOL(int rte_vhost_driver_callback_register(const char *path,
+               struct rte_vhost_device_ops const * const ops),
+               rte_vhost_driver_callback_register_v24);
+
 struct rte_vhost_device_ops const *
 vhost_driver_callback_get(const char *path)
 {
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index d322a4a888..7bcbfd12cf 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -64,6 +64,12 @@ DPDK_23 {
        local: *;
 };
 
+DPDK_24 {
+       global:
+
+       rte_vhost_driver_callback_register;
+} DPDK_23;
+
 EXPERIMENTAL {
        global:
 
@@ -98,6 +104,9 @@ EXPERIMENTAL {
        # added in 22.11
        rte_vhost_async_dma_unconfigure;
        rte_vhost_vring_call_nonblock;
+
+        # added in 23.07
+       rte_vhost_notify_guest;
 };
 
 INTERNAL {
diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index 8ff6434c93..79e88f986e 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -44,6 +44,10 @@ static const struct vhost_vq_stats_name_off 
vhost_vq_stat_strings[] = {
        {"size_1024_1518_packets", offsetof(struct vhost_virtqueue, 
stats.size_bins[6])},
        {"size_1519_max_packets",  offsetof(struct vhost_virtqueue, 
stats.size_bins[7])},
        {"guest_notifications",    offsetof(struct vhost_virtqueue, 
stats.guest_notifications)},
+       {"guest_notifications_offloaded", offsetof(struct vhost_virtqueue,
+               stats.guest_notifications_offloaded)},
+       {"guest_notifications_error", offsetof(struct vhost_virtqueue,
+               stats.guest_notifications_error)},
        {"iotlb_hits",             offsetof(struct vhost_virtqueue, 
stats.iotlb_hits)},
        {"iotlb_misses",           offsetof(struct vhost_virtqueue, 
stats.iotlb_misses)},
        {"inflight_submitted",     offsetof(struct vhost_virtqueue, 
stats.inflight_submitted)},
@@ -1467,6 +1471,40 @@ rte_vhost_enable_guest_notification(int vid, uint16_t 
queue_id, int enable)
        return ret;
 }
 
+void
+rte_vhost_notify_guest(int vid, uint16_t queue_id)
+{
+       struct virtio_net *dev = get_device(vid);
+       struct vhost_virtqueue *vq;
+
+       if (!dev ||  queue_id >= VHOST_MAX_VRING)
+               return;
+
+       vq = dev->virtqueue[queue_id];
+       if (!vq)
+               return;
+
+       rte_rwlock_read_lock(&vq->access_lock);
+
+       if (vq->callfd >= 0) {
+               int ret = eventfd_write(vq->callfd, (eventfd_t)1);
+
+               if (ret) {
+                       if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
+                               
__atomic_fetch_add(&vq->stats.guest_notifications_error,
+                                       1, __ATOMIC_RELAXED);
+               } else {
+                       if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
+                               
__atomic_fetch_add(&vq->stats.guest_notifications,
+                                       1, __ATOMIC_RELAXED);
+                       if (dev->notify_ops->guest_notified)
+                               dev->notify_ops->guest_notified(dev->vid);
+               }
+       }
+
+       rte_rwlock_read_unlock(&vq->access_lock);
+}
+
 void
 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
 {
diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index 37609c7c8d..0ab75b78b5 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -141,6 +141,8 @@ struct virtqueue_stats {
        uint64_t inflight_completed;
        /* Counters below are atomic, and should be incremented as such. */
        uint64_t guest_notifications;
+       uint64_t guest_notifications_offloaded;
+       uint64_t guest_notifications_error;
 };
 
 /**
@@ -884,6 +886,35 @@ vhost_need_event(uint16_t event_idx, uint16_t new_idx, 
uint16_t old)
        return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old);
 }
 
+static __rte_always_inline void
+vhost_vring_inject_irq(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+       int ret;
+
+       if (dev->notify_ops->guest_notify &&
+           dev->notify_ops->guest_notify(dev->vid, vq->index)) {
+               if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
+                       
__atomic_fetch_add(&vq->stats.guest_notifications_offloaded,
+                               1, __ATOMIC_RELAXED);
+               return;
+       }
+
+       ret = eventfd_write(vq->callfd, (eventfd_t) 1);
+       if (ret) {
+               if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
+                       __atomic_fetch_add(&vq->stats.guest_notifications_error,
+                               1, __ATOMIC_RELAXED);
+               return;
+       }
+
+       if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
+               __atomic_fetch_add(&vq->stats.guest_notifications,
+                       1, __ATOMIC_RELAXED);
+       if (dev->notify_ops->guest_notified)
+               dev->notify_ops->guest_notified(dev->vid);
+}
+
+
 static __rte_always_inline void
 vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
@@ -903,26 +934,16 @@ vhost_vring_call_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq)
                        "%s: used_event_idx=%d, old=%d, new=%d\n",
                        __func__, vhost_used_event(vq), old, new);
 
-               if ((vhost_need_event(vhost_used_event(vq), new, old) &&
-                                       (vq->callfd >= 0)) ||
-                               unlikely(!signalled_used_valid)) {
-                       eventfd_write(vq->callfd, (eventfd_t) 1);
-                       if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
-                               
__atomic_fetch_add(&vq->stats.guest_notifications,
-                                       1, __ATOMIC_RELAXED);
-                       if (dev->notify_ops->guest_notified)
-                               dev->notify_ops->guest_notified(dev->vid);
+               if ((vhost_need_event(vhost_used_event(vq), new, old) ||
+                    unlikely(!signalled_used_valid)) &&
+                   vq->callfd >= 0) {
+                       vhost_vring_inject_irq(dev, vq);
                }
        } else {
                /* Kick the guest if necessary. */
                if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
                                && (vq->callfd >= 0)) {
-                       eventfd_write(vq->callfd, (eventfd_t)1);
-                       if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
-                               
__atomic_fetch_add(&vq->stats.guest_notifications,
-                                       1, __ATOMIC_RELAXED);
-                       if (dev->notify_ops->guest_notified)
-                               dev->notify_ops->guest_notified(dev->vid);
+                       vhost_vring_inject_irq(dev, vq);
                }
        }
 }
@@ -974,11 +995,8 @@ vhost_vring_call_packed(struct virtio_net *dev, struct 
vhost_virtqueue *vq)
        if (vhost_need_event(off, new, old))
                kick = true;
 kick:
-       if (kick) {
-               eventfd_write(vq->callfd, (eventfd_t)1);
-               if (dev->notify_ops->guest_notified)
-                       dev->notify_ops->guest_notified(dev->vid);
-       }
+       if (kick && vq->callfd >= 0)
+               vhost_vring_inject_irq(dev, vq);
 }
 
 static __rte_always_inline void
@@ -1017,4 +1035,11 @@ mbuf_is_consumed(struct rte_mbuf *m)
 
 uint64_t hua_to_alignment(struct rte_vhost_memory *mem, void *ptr);
 void mem_set_dump(void *ptr, size_t size, bool enable, uint64_t alignment);
+
+/* Versioned functions */
+int rte_vhost_driver_callback_register_v23(const char *path,
+       struct rte_vhost_device_ops const * const ops);
+int rte_vhost_driver_callback_register_v24(const char *path,
+       struct rte_vhost_device_ops const * const ops);
+
 #endif /* _VHOST_NET_CDEV_H_ */

Reply via email to