The branch main has been updated by glebius:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=a4fc41423f7d6e43287822212f0e9db7aab83d39

commit a4fc41423f7d6e43287822212f0e9db7aab83d39
Author:     Gleb Smirnoff <[email protected]>
AuthorDate: 2022-06-24 16:09:10 +0000
Commit:     Gleb Smirnoff <[email protected]>
CommitDate: 2022-06-24 16:09:10 +0000

    sockets: enable protocol specific socket buffers
    
    Split struct sockbuf into common shared fields and protocol specific
    union, where protocols are free to implement whatever buffer they
    want.  Such protocols should mark themselves with PR_SOCKBUF and are
    expected to initialize their buffers in their pr_attach and tear
    them down in pr_detach.
    
    Reviewed by:            markj
    Differential revision:  https://reviews.freebsd.org/D35299
---
 sys/kern/uipc_socket.c | 12 +++++--
 sys/sys/protosw.h      |  3 ++
 sys/sys/sockbuf.h      | 86 ++++++++++++++++++++++++++++++++------------------
 3 files changed, 67 insertions(+), 34 deletions(-)

diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index a2241464e35b..ffaf5acdd05d 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -418,8 +418,6 @@ soalloc(struct vnet *vnet)
         * a feature to change class of an existing lock, so we use DUPOK.
         */
        mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK);
-       so->so_snd.sb_mtx = &so->so_snd_mtx;
-       so->so_rcv.sb_mtx = &so->so_rcv_mtx;
        mtx_init(&so->so_snd_mtx, "so_snd", NULL, MTX_DEF);
        mtx_init(&so->so_rcv_mtx, "so_rcv", NULL, MTX_DEF);
        so->so_rcv.sb_sel = &so->so_rdsel;
@@ -557,6 +555,10 @@ socreate(int dom, struct socket **aso, int type, int proto,
            so_rdknl_assert_lock);
        knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
            so_wrknl_assert_lock);
+       if ((prp->pr_flags & PR_SOCKBUF) == 0) {
+               so->so_snd.sb_mtx = &so->so_snd_mtx;
+               so->so_rcv.sb_mtx = &so->so_rcv_mtx;
+       }
        /*
         * Auto-sizing of socket buffers is managed by the protocols and
         * the appropriate flags must be set in the pru_attach function.
@@ -756,6 +758,10 @@ sonewconn(struct socket *head, int connstatus)
                    __func__, head->so_pcb);
                return (NULL);
        }
+       if ((so->so_proto->pr_flags & PR_SOCKBUF) == 0) {
+               so->so_snd.sb_mtx = &so->so_snd_mtx;
+               so->so_rcv.sb_mtx = &so->so_rcv_mtx;
+       }
        if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
                sodealloc(so);
                log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
@@ -1207,7 +1213,7 @@ sofree(struct socket *so)
         * socket exist anywhere else in the stack.  Therefore, no locks need
         * to be acquired or held.
         */
-       if (!SOLISTENING(so)) {
+       if (!(pr->pr_flags & PR_SOCKBUF) && !SOLISTENING(so)) {
                sbdestroy(so, SO_SND);
                sbdestroy(so, SO_RCV);
        }
diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h
index dc550d42f1fd..26cd1bc3fc16 100644
--- a/sys/sys/protosw.h
+++ b/sys/sys/protosw.h
@@ -114,6 +114,8 @@ struct protosw {
  *     and the protocol understands the MSG_EOF flag.  The first property is
  *     is only relevant if PR_CONNREQUIRED is set (otherwise sendto is allowed
  *     anyhow).
+ * PR_SOCKBUF requires protocol to initialize and destroy its socket buffers
+ * in its pr_attach and pr_detach.
  */
 #define        PR_ATOMIC       0x01            /* exchange atomic messages 
only */
 #define        PR_ADDR         0x02            /* addresses given with 
messages */
@@ -123,6 +125,7 @@ struct protosw {
 #define PR_IMPLOPCL    0x20            /* implied open/close */
 #define        PR_LASTHDR      0x40            /* enforce ipsec policy; last 
header */
 #define        PR_CAPATTACH    0x80            /* socket can attach in cap 
mode */
+#define        PR_SOCKBUF      0x100           /* private implementation of 
buffers */
 
 /*
  * In earlier BSD network stacks, a single pr_usrreq() function pointer was
diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h
index 31c351860a94..7800b2790c04 100644
--- a/sys/sys/sockbuf.h
+++ b/sys/sys/sockbuf.h
@@ -75,41 +75,65 @@ struct thread;
 struct selinfo;
 
 /*
- * Variables for socket buffering.
+ * Socket buffer
  *
- * Locking key to struct sockbuf:
- * (a) locked by SOCKBUF_LOCK().
+ * A buffer starts with the fields that are accessed by I/O multiplexing
+ * APIs like select(2), kevent(2) or AIO and thus are shared between different
+ * buffer implementations.  They are protected by the SOCK_RECVBUF_LOCK()
+ * or SOCK_SENDBUF_LOCK() of the owning socket.
+ *
+ * XXX: sb_acc, sb_ccc and sb_mbcnt shall become implementation specific
+ * methods.
+ *
+ * Protocol specific implementations follow in a union.
  */
 struct sockbuf {
-       struct  mtx *sb_mtx;            /* sockbuf lock */
        struct  selinfo *sb_sel;        /* process selecting read/write */
-       short   sb_state;       /* (a) socket state on sockbuf */
-       short   sb_flags;       /* (a) flags, see above */
-       struct  mbuf *sb_mb;    /* (a) the mbuf chain */
-       struct  mbuf *sb_mbtail; /* (a) the last mbuf in the chain */
-       struct  mbuf *sb_lastrecord;    /* (a) first mbuf of last
-                                        * record in socket buffer */
-       struct  mbuf *sb_sndptr; /* (a) pointer into mbuf chain */
-       struct  mbuf *sb_fnrdy; /* (a) pointer to first not ready buffer */
-       u_int   sb_sndptroff;   /* (a) byte offset of ptr into chain */
-       u_int   sb_acc;         /* (a) available chars in buffer */
-       u_int   sb_ccc;         /* (a) claimed chars in buffer */
-       u_int   sb_hiwat;       /* (a) max actual char count */
-       u_int   sb_mbcnt;       /* (a) chars of mbufs used */
-       u_int   sb_mbmax;       /* (a) max chars of mbufs to use */
-       u_int   sb_ctl;         /* (a) non-data chars in buffer */
-       u_int   sb_tlscc;       /* (a) TLS chain characters */
-       u_int   sb_tlsdcc;      /* (a) TLS characters being decrypted */
-       int     sb_lowat;       /* (a) low water mark */
-       sbintime_t      sb_timeo;       /* (a) timeout for read/write */
-       struct  mbuf *sb_mtls;  /* (a) TLS mbuf chain */
-       struct  mbuf *sb_mtlstail; /* (a) last mbuf in TLS chain */
-       int     (*sb_upcall)(struct socket *, void *, int); /* (a) */
-       void    *sb_upcallarg;  /* (a) */
-       uint64_t sb_tls_seqno;  /* (a) TLS seqno */
-       struct  ktls_session *sb_tls_info; /* (a + b) TLS state */
-       TAILQ_HEAD(, kaiocb) sb_aiojobq; /* (a) pending AIO ops */
-       struct  task sb_aiotask; /* AIO task */
+       short   sb_state;               /* socket state on sockbuf */
+       short   sb_flags;               /* flags, see above */
+       u_int   sb_acc;                 /* available chars in buffer */
+       u_int   sb_ccc;                 /* claimed chars in buffer */
+       u_int   sb_mbcnt;               /* chars of mbufs used */
+       u_int   sb_ctl;                 /* non-data chars in buffer */
+       u_int   sb_hiwat;               /* max actual char count */
+       u_int   sb_lowat;               /* low water mark */
+       u_int   sb_mbmax;               /* max chars of mbufs to use */
+       sbintime_t sb_timeo;            /* timeout for read/write */
+       int     (*sb_upcall)(struct socket *, void *, int);
+       void    *sb_upcallarg;
+       TAILQ_HEAD(, kaiocb) sb_aiojobq;        /* pending AIO ops */
+       struct  task sb_aiotask;                /* AIO task */
+       union {
+               /*
+                * Classic BSD one-size-fits-all socket buffer, capable of
+                * doing streams and datagrams. The stream part is able
+                * to perform special features:
+                * - not ready data (sendfile)
+                * - TLS
+                */
+               struct {
+                       /* compat: sockbuf lock pointer */
+                       struct  mtx *sb_mtx;
+                       /* first and last mbufs in the chain */
+                       struct  mbuf *sb_mb;
+                       struct  mbuf *sb_mbtail;
+                       /* first mbuf of last record in socket buffer */
+                       struct  mbuf *sb_lastrecord;
+                       /* pointer to data to send next (TCP */
+                       struct  mbuf *sb_sndptr;
+                       /* pointer to first not ready buffer */
+                       struct  mbuf *sb_fnrdy;
+                       /* byte offset of ptr into chain, used with sb_sndptr */
+                       u_int   sb_sndptroff;
+                       /* TLS */
+                       u_int   sb_tlscc;       /* TLS chain characters */
+                       u_int   sb_tlsdcc;      /* characters being decrypted */
+                       struct  mbuf *sb_mtls;  /*  TLS mbuf chain */
+                       struct  mbuf *sb_mtlstail; /* last mbuf in TLS chain */
+                       uint64_t sb_tls_seqno;  /* TLS seqno */
+                       struct  ktls_session *sb_tls_info; /* TLS state */
+               };
+       };
 };
 
 #endif /* defined(_KERNEL) || defined(_WANT_SOCKET) */

Reply via email to