On Sat, May 16, 2026 at 11:47:45AM +0800, Ziyu Zhang wrote:
vsock_poll() reads vsk->peer_shutdown before taking the socket
lock to set EPOLLHUP and EPOLLRDHUP, then reads it again under the
lock to report EOF readability. A shutdown packet can update
peer_shutdown while poll is waiting for the lock, so one poll invocation
can report EPOLLIN without the corresponding HUP/RDHUP bits.

Keep non-connectible sockets on a single lockless READ_ONCE()

Should this be paired with WRITE_ONCE() on writers?

snapshot. For connectible sockets, defer shutdown-derived poll bits
until after lock_sock() and use one READ_ONCE() snapshot for both EOF
readability and HUP/RDHUP. This preserves shutdowns that arrive before
the lock is acquired and keeps all peer-shutdown-derived bits consistent
for a poll pass.

Fixes: d021c344051a ("VSOCK: Introduce VM Sockets")
Signed-off-by: Ziyu Zhang <[email protected]>
---
net/vmw_vsock/af_vsock.c | 40 ++++++++++++++++++++++++++--------------
1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index adcba1b7b..bed42347b 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1122,6 +1122,25 @@ static int vsock_shutdown(struct socket *sock, int mode)
        return err;
}

+static __poll_t vsock_poll_shutdown(struct sock *sk, u32 peer_shutdown)
+{
+       __poll_t mask = 0;
+
+       /* INET sockets treat local write shutdown and peer write shutdown as a
+        * case of EPOLLHUP set.
+        */
+       if (sk->sk_shutdown == SHUTDOWN_MASK ||
+           ((sk->sk_shutdown & SEND_SHUTDOWN) &&
+            (peer_shutdown & SEND_SHUTDOWN)))
+               mask |= EPOLLHUP;
+
+       if (sk->sk_shutdown & RCV_SHUTDOWN ||
+           peer_shutdown & SEND_SHUTDOWN)
+               mask |= EPOLLRDHUP;
+
+       return mask;
+}
+
static __poll_t vsock_poll(struct file *file, struct socket *sock,
                               poll_table *wait)
{
@@ -1139,19 +1158,9 @@ static __poll_t vsock_poll(struct file *file, struct 
socket *sock,
                /* Signify that there has been an error on this socket. */
                mask |= EPOLLERR;

-       /* INET sockets treat local write shutdown and peer write shutdown as a
-        * case of EPOLLHUP set.
-        */
-       if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
-           ((sk->sk_shutdown & SEND_SHUTDOWN) &&
-            (vsk->peer_shutdown & SEND_SHUTDOWN))) {
-               mask |= EPOLLHUP;
-       }
-
-       if (sk->sk_shutdown & RCV_SHUTDOWN ||
-           vsk->peer_shutdown & SEND_SHUTDOWN) {
-               mask |= EPOLLRDHUP;
-       }
+       if (!sock_type_connectible(sk->sk_type))
+               mask |= vsock_poll_shutdown(sk,
+                                           READ_ONCE(vsk->peer_shutdown));

Can we move this in the `if (sock->type == SOCK_DGRAM)` branch ?

Not a strong opinion about that, but in any case IMO we should add a comment here to explain why we are doing only for not connectible sockets.

That said, if we use WRITE_ONCE in the writers, do we really need to move this after the lock_sock for the connectable ones?


        if (sk_is_readable(sk))
                mask |= EPOLLIN | EPOLLRDNORM;
@@ -1171,6 +1180,7 @@ static __poll_t vsock_poll(struct file *file, struct 
socket *sock,

        } else if (sock_type_connectible(sk->sk_type)) {
                const struct vsock_transport *transport;
+               u32 peer_shutdown;

                lock_sock(sk);

@@ -1203,10 +1213,12 @@ static __poll_t vsock_poll(struct file *file, struct 
socket *sock,
                 * terminated should also be considered read, and we check the
                 * shutdown flag for that.
                 */
+               peer_shutdown = READ_ONCE(vsk->peer_shutdown);
                if (sk->sk_shutdown & RCV_SHUTDOWN ||
-                   vsk->peer_shutdown & SEND_SHUTDOWN) {
+                   peer_shutdown & SEND_SHUTDOWN) {
                        mask |= EPOLLIN | EPOLLRDNORM;
                }
+               mask |= vsock_poll_shutdown(sk, peer_shutdown);

nit: to keep the order the same as before, I would move this call just before this `if` block, but I don't think it makes any difference in the end.


                /* Connected sockets that can produce data can be written. */
                if (transport && sk->sk_state == TCP_ESTABLISHED) {
--
2.43.0



Reply via email to