On Wed, Mar 30, 2011 at 09:33:11PM +0200, Claudio Jeker wrote:
> On Wed, Mar 30, 2011 at 08:34:24PM +0200, Mark Kettenis wrote:
> > > Date: Tue, 29 Mar 2011 22:42:47 +0200
> > > From: Claudio Jeker <cje...@diehard.n-r-g.com>
> > > 
> > > Here is a possible fix. The problem was that because of the way NFS uses
> > > the socket API it did not turn of the sendbuffer scaling which reset the
> > > size of the socket back to 17376 bytes which is a no go when a buffer of
> > > more then 17k is generated by NFS. It is better to initialize the sb_wat
> > > in soreserve() which is called by NFS and all attach functions.
> > 
> > This no longer does the sbcheckreserve() dance though.  Is that alright?
> > 
> 
> The code that was there previously was a bit strange. Since when
> sb_hiwat == 0 is true then sb_wat is 0 as well. Additionally
> sbcheckreserve() would only cause the watermark to be set to the default
> which is tcp_sendspace/tcp_recvspace. So since sb_hiwat == 0 is never run.
> 

While digging myself deeper into this code I figured out that we may want
something a bit more like the following diff. It adds back the
sbcheckreserve() checks in TCP and the socket buffers inherit more from
their listening socket when creating a new socket because of a SYN.
FreeBSD does something similar in their sonewconn() function. It also
seems to follow the accept(2) man page more closely:
                 The accept() call extracts the first connection request on
     the queue of pending connections, creates a new socket with the same
     properties of s, and allocates a new file descriptor for the socket.

Not sure if the SB_ASYNC sb_flags needs to be inherited as well.
-- 
:wq Claudio

Index: kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.51
diff -u -p -r1.51 uipc_socket2.c
--- kern/uipc_socket2.c 24 Sep 2010 02:59:45 -0000      1.51
+++ kern/uipc_socket2.c 31 Mar 2011 14:27:31 -0000
@@ -176,8 +176,16 @@ sonewconn(struct socket *head, int conns
        /*
         * Inherit watermarks but those may get clamped in low mem situations.
         */
+       if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
+               pool_put(&socket_pool, so);
+               return ((struct socket *)0);
+       }
        so->so_snd.sb_wat = head->so_snd.sb_wat;
+       so->so_snd.sb_lowat = head->so_snd.sb_lowat;
+       so->so_snd.sb_timeo = head->so_snd.sb_timeo;
        so->so_rcv.sb_wat = head->so_rcv.sb_wat;
+       so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
+       so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
 
        soqinsque(head, so, soqueue);
        if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL, NULL,
@@ -353,6 +361,8 @@ soreserve(struct socket *so, u_long sndc
                goto bad;
        if (sbreserve(&so->so_rcv, rcvcc))
                goto bad2;
+       so->so_snd.sb_wat = sndcc;
+       so->so_rcv.sb_wat = rcvcc;
        if (so->so_rcv.sb_lowat == 0)
                so->so_rcv.sb_lowat = 1;
        if (so->so_snd.sb_lowat == 0)
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.105
diff -u -p -r1.105 tcp_usrreq.c
--- netinet/tcp_usrreq.c        10 Oct 2010 22:02:50 -0000      1.105
+++ netinet/tcp_usrreq.c        31 Mar 2011 13:42:56 -0000
@@ -652,16 +652,10 @@ tcp_attach(so)
        struct inpcb *inp;
        int error;
 
-       if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
-               /* if low on memory only allow smaller then default buffers */
-               if (so->so_snd.sb_wat == 0 ||
-                   sbcheckreserve(so->so_snd.sb_wat, tcp_sendspace))
-                       so->so_snd.sb_wat = tcp_sendspace;
-               if (so->so_rcv.sb_wat == 0 ||
-                   sbcheckreserve(so->so_rcv.sb_wat, tcp_recvspace))
-                       so->so_rcv.sb_wat = tcp_recvspace;
-
-               error = soreserve(so, so->so_snd.sb_wat, so->so_rcv.sb_wat);
+       if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0 ||
+           sbcheckreserve(so->so_snd.sb_wat, tcp_sendspace) ||
+           sbcheckreserve(so->so_rcv.sb_wat, tcp_recvspace)) {
+               error = soreserve(so, tcp_sendspace, tcp_recvspace);
                if (error)
                        return (error);
        }

Reply via email to