The branch main has been updated by glebius:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=f2c2ed7df313f641451ca5a468f658fd350aae52

commit f2c2ed7df313f641451ca5a468f658fd350aae52
Author:     Gleb Smirnoff <gleb...@freebsd.org>
AuthorDate: 2025-07-25 20:05:56 +0000
Commit:     Gleb Smirnoff <gleb...@freebsd.org>
CommitDate: 2025-07-25 20:05:56 +0000

    sendfile: don't hack sb_lowat for sockets that manage the watermark
    
    In the sendfile(2) we carry an old hack (originating from d99b0dd2c5297)
    to help dumb benchmarks and applications to achieve higher performance. We
    would modify low watermark on the socket send buffer to avoid socket being
    reported as writable too early, which would result in lots of small
    writes.
    
    Skip that hack for applications that do setsockopt(SO_SNDLOWAT) or that
    register the socket in kevent(2) with NOTE_LOWAT feature.  First, we don't
    want the hack to rewrite the watermark value explicitly specified by the
    user.  Second, in certain cases that can lead to real performance
    regressions.  A kevent(2) with NOTE_LOWAT would report socket as writable,
    but then sendfile(2) would write 0 bytes and return EAGAIN.
    
    The change also disables the hack for unix(4) sockets, leaving only TCP.
    
    Reviewed by:            rrs
    Differential Revision:  https://reviews.freebsd.org/D50581
---
 sys/kern/kern_sendfile.c | 11 +++++++----
 sys/kern/uipc_sockbuf.c  |  1 +
 sys/kern/uipc_socket.c   |  6 +++++-
 sys/netinet/tcp_usrreq.c |  2 +-
 sys/sys/sockbuf.h        |  2 +-
 5 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c
index 35b258e68701..8438298afc0e 100644
--- a/sys/kern/kern_sendfile.c
+++ b/sys/kern/kern_sendfile.c
@@ -698,10 +698,13 @@ sendfile_wait_generic(struct socket *so, off_t need, int 
*space)
         */
        error = 0;
        SOCK_SENDBUF_LOCK(so);
-       if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
-               so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
-       if (so->so_snd.sb_lowat < PAGE_SIZE && so->so_snd.sb_hiwat >= PAGE_SIZE)
-               so->so_snd.sb_lowat = PAGE_SIZE;
+       if (so->so_snd.sb_flags & SB_AUTOLOWAT) {
+               if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
+                       so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
+               if (so->so_snd.sb_lowat < PAGE_SIZE &&
+                   so->so_snd.sb_hiwat >= PAGE_SIZE)
+                       so->so_snd.sb_lowat = PAGE_SIZE;
+       }
 retry_space:
        if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
                error = EPIPE;
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c
index ec00878cd9a5..ffaa9b800592 100644
--- a/sys/kern/uipc_sockbuf.c
+++ b/sys/kern/uipc_sockbuf.c
@@ -779,6 +779,7 @@ sbsetopt(struct socket *so, struct sockopt *sopt)
                 * high-water.
                 */
                *lowat = (cc > *hiwat) ? *hiwat : cc;
+               *flags &= ~SB_AUTOLOWAT;
                break;
        }
 
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 6c9eb7139cd1..4e8c179acee9 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -1211,7 +1211,8 @@ solisten_clone(struct socket *head)
        so->so_rcv.sb_timeo = head->sol_sbrcv_timeo;
        so->so_snd.sb_timeo = head->sol_sbsnd_timeo;
        so->so_rcv.sb_flags = head->sol_sbrcv_flags & SB_AUTOSIZE;
-       so->so_snd.sb_flags = head->sol_sbsnd_flags & SB_AUTOSIZE;
+       so->so_snd.sb_flags = head->sol_sbsnd_flags &
+           (SB_AUTOSIZE | SB_AUTOLOWAT);
        if ((so->so_proto->pr_flags & PR_SOCKBUF) == 0) {
                so->so_snd.sb_mtx = &so->so_snd_mtx;
                so->so_rcv.sb_mtx = &so->so_rcv_mtx;
@@ -4514,6 +4515,9 @@ sokqfilter_generic(struct socket *so, struct knote *kn)
                SOCK_BUF_LOCK(so, which);
                knlist_add(knl, kn, 1);
                sb->sb_flags |= SB_KNOTE;
+               if ((kn->kn_sfflags & NOTE_LOWAT) &&
+                   (sb->sb_flags & SB_AUTOLOWAT))
+                       sb->sb_flags &= ~SB_AUTOLOWAT;
                SOCK_BUF_UNLOCK(so, which);
        }
        SOCK_UNLOCK(so);
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 70e4c04b79e5..98c934955121 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -164,7 +164,7 @@ tcp_usr_attach(struct socket *so, int proto, struct thread 
*td)
                goto out;
 
        so->so_rcv.sb_flags |= SB_AUTOSIZE;
-       so->so_snd.sb_flags |= SB_AUTOSIZE;
+       so->so_snd.sb_flags |= (SB_AUTOLOWAT | SB_AUTOSIZE);
        error = in_pcballoc(so, &V_tcbinfo);
        if (error)
                goto out;
diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h
index 7f6234ade6f4..2fed44bc9825 100644
--- a/sys/sys/sockbuf.h
+++ b/sys/sys/sockbuf.h
@@ -40,7 +40,7 @@
 #define        SB_SEL          0x08            /* someone is selecting */
 #define        SB_ASYNC        0x10            /* ASYNC I/O, need signals */
 #define        SB_UPCALL       0x20            /* someone wants an upcall */
-/* was SB_NOINTR       0x40            */
+#define        SB_AUTOLOWAT    0x40            /* sendfile(2) may autotune 
sb_lowat */
 #define        SB_AIO          0x80            /* AIO operations queued */
 #define        SB_KNOTE        0x100           /* kernel note attached */
 #define        SB_NOCOALESCE   0x200           /* don't coalesce new data into 
existing mbufs */

Reply via email to