Add tun_wake_queue() to tun.c and export it for use by vhost-net. The
function validates that the file belongs to a tun/tap device,
dereferences the tun_struct under RCU, and delegates to
__tun_wake_queue().

vhost_net_buf_produce() now calls tun_wake_queue() after a successful
batched consume of the ring to allow the netdev subqueue to be woken up.
The point is to allow the queue to be stopped when it gets full, which
is required for traffic shaping - implemented by the following
"avoid ptr_ring tail-drop when a qdisc is present".

Without the corresponding queue stopping, this patch alone causes no
throughput regression for a tap+vhost-net setup sending to a qemu VM:
3.857 Mpps to 3.891 Mpps.

Details: AMD Ryzen 5 5600X at 4.3 GHz, 3200 MHz RAM, isolated QEMU
threads, XDP drop program active in VM, pktgen sender; Avg over
50 runs @ 100,000,000 packets. SRSO and spectre v2 mitigations disabled.

Co-developed-by: Tim Gebauer <[email protected]>
Signed-off-by: Tim Gebauer <[email protected]>
Signed-off-by: Simon Schippers <[email protected]>
---
 drivers/net/tun.c      | 23 +++++++++++++++++++++++
 drivers/vhost/net.c    | 21 +++++++++++++++------
 include/linux/if_tun.h |  3 +++
 3 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 00ecf128fe8e..fc358c4c355b 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -3776,6 +3776,29 @@ struct ptr_ring *tun_get_tx_ring(struct file *file)
 }
 EXPORT_SYMBOL_GPL(tun_get_tx_ring);
 
+/* Callers must hold ring.consumer_lock */
+void tun_wake_queue(struct file *file, int consumed)
+{
+       struct tun_file *tfile;
+       struct tun_struct *tun;
+
+       if (file->f_op != &tun_fops)
+               return;
+
+       tfile = file->private_data;
+       if (!tfile)
+               return;
+
+       rcu_read_lock();
+
+       tun = rcu_dereference(tfile->tun);
+       if (tun)
+               __tun_wake_queue(tun, tfile, consumed);
+
+       rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(tun_wake_queue);
+
 module_init(tun_init);
 module_exit(tun_cleanup);
 MODULE_DESCRIPTION(DRV_DESCRIPTION);
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 80965181920c..ee583d6cc0fa 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -176,13 +176,21 @@ static void *vhost_net_buf_consume(struct vhost_net_buf 
*rxq)
        return ret;
 }
 
-static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq)
+static int vhost_net_buf_produce(struct sock *sk,
+                                struct vhost_net_virtqueue *nvq)
 {
+       struct file *file = sk->sk_socket->file;
        struct vhost_net_buf *rxq = &nvq->rxq;
 
        rxq->head = 0;
-       rxq->tail = ptr_ring_consume_batched(nvq->rx_ring, rxq->queue,
-                                             VHOST_NET_BATCH);
+       spin_lock(&nvq->rx_ring->consumer_lock);
+       rxq->tail = __ptr_ring_consume_batched(nvq->rx_ring, rxq->queue,
+                                              VHOST_NET_BATCH);
+
+       if (rxq->tail)
+               tun_wake_queue(file, rxq->tail);
+
+       spin_unlock(&nvq->rx_ring->consumer_lock);
        return rxq->tail;
 }
 
@@ -209,14 +217,15 @@ static int vhost_net_buf_peek_len(void *ptr)
        return __skb_array_len_with_tag(ptr);
 }
 
-static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
+static int vhost_net_buf_peek(struct sock *sk,
+                             struct vhost_net_virtqueue *nvq)
 {
        struct vhost_net_buf *rxq = &nvq->rxq;
 
        if (!vhost_net_buf_is_empty(rxq))
                goto out;
 
-       if (!vhost_net_buf_produce(nvq))
+       if (!vhost_net_buf_produce(sk, nvq))
                return 0;
 
 out:
@@ -995,7 +1004,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, 
struct sock *sk)
        unsigned long flags;
 
        if (rvq->rx_ring)
-               return vhost_net_buf_peek(rvq);
+               return vhost_net_buf_peek(sk, rvq);
 
        spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
        head = skb_peek(&sk->sk_receive_queue);
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 80166eb62f41..5f3e206c7a73 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -22,6 +22,7 @@ struct tun_msg_ctl {
 #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
 struct socket *tun_get_socket(struct file *);
 struct ptr_ring *tun_get_tx_ring(struct file *file);
+void tun_wake_queue(struct file *file, int consumed);
 
 static inline bool tun_is_xdp_frame(void *ptr)
 {
@@ -55,6 +56,8 @@ static inline struct ptr_ring *tun_get_tx_ring(struct file *f)
        return ERR_PTR(-EINVAL);
 }
 
+static inline void tun_wake_queue(struct file *f, int consumed) {}
+
 static inline bool tun_is_xdp_frame(void *ptr)
 {
        return false;
-- 
2.43.0


Reply via email to