The branch main has been updated by markj:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=08e638c089ab57531f08994d03c9dde54c4744f9

commit 08e638c089ab57531f08994d03c9dde54c4744f9
Author:     Mark Johnston <ma...@freebsd.org>
AuthorDate: 2025-02-06 14:15:41 +0000
Commit:     Mark Johnston <ma...@freebsd.org>
CommitDate: 2025-02-06 14:15:41 +0000

    udp: Add a sysctl to modify listening socket FIB inheritance
    
    Introduce the net.inet.udp.bind_all_fibs tunable, set to 1 by default
    for compatibility with current behaviour.  When set to 0, all received
    datagrams will be dropped unless an inpcb bound to the same FIB exists.
    
    No functional change intended, as the new behaviour is not enabled by
    default.
    
    Reviewed by:    glebius
    MFC after:      2 weeks
    Sponsored by:   Klara, Inc.
    Sponsored by:   Stormshield
    Differential Revision:  https://reviews.freebsd.org/D48664
---
 share/man/man4/udp.4       | 15 ++++++++++++++-
 sys/netinet/udp_usrreq.c   | 40 ++++++++++++++++++++++++++++++----------
 sys/netinet/udp_var.h      |  6 ++++--
 sys/netinet6/udp6_usrreq.c | 18 ++++++++++++------
 4 files changed, 60 insertions(+), 19 deletions(-)

diff --git a/share/man/man4/udp.4 b/share/man/man4/udp.4
index 178adfe06b9f..b1dbff56154f 100644
--- a/share/man/man4/udp.4
+++ b/share/man/man4/udp.4
@@ -25,7 +25,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd August 1, 2022
+.Dd January 20, 2025
 .Dt UDP 4
 .Os
 .Sh NAME
@@ -107,6 +107,19 @@ Only one value is supported for this option:
 .Tn UDP_ENCAP_ESPINUDP
 from RFC 3948, defined in
 .In netinet/udp.h .
+.Sh FIB support
+UDP sockets are FIB-aware.
+They inherit the FIB of the process which created the socket.
+By default, a UDP socket bound to an address can receive datagrams originating
+from any FIB.
+If the
+.Va net.inet.udp.bind_all_fibs
+tunable is set to 0, all UDP sockets will receive only datagrams originating
+from the same FIB as the socket.
+In this mode, multiple sockets can be bound to the same address, so long as
+each socket belongs to a different FIB, similar to the behavior of the
+.Dv SO_REUSEPORT
+option.
 .Sh MIB (sysctl) Variables
 The
 .Nm
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index 50738d5549f3..8278efcae60c 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -105,6 +105,11 @@
  * Per RFC 3828, July, 2004.
  */
 
+VNET_DEFINE(int, udp_bind_all_fibs) = 1;
+SYSCTL_INT(_net_inet_udp, OID_AUTO, bind_all_fibs, CTLFLAG_VNET | 
CTLFLAG_RDTUN,
+    &VNET_NAME(udp_bind_all_fibs), 0,
+    "Bound sockets receive traffic from all FIBs");
+
 /*
  * BSD 4.2 defaulted the udp checksum to be off.  Turning off udp checksums
  * removes the only data integrity mechanism for packets and malformed
@@ -359,10 +364,12 @@ udp_multi_input(struct mbuf *m, int proto, struct 
sockaddr_in *udp_in)
 #endif
        struct inpcb *inp;
        struct mbuf *n;
-       int appends = 0;
+       int appends = 0, fib;
 
        MPASS(ip->ip_hl == sizeof(struct ip) >> 2);
 
+       fib = M_GETFIB(m);
+
        while ((inp = inp_next(&inpi)) != NULL) {
                /*
                 * XXXRW: Because we weren't holding either the inpcb
@@ -370,6 +377,14 @@ udp_multi_input(struct mbuf *m, int proto, struct 
sockaddr_in *udp_in)
                 * before, we should probably recheck now that the
                 * inpcb lock is held.
                 */
+
+               if (V_udp_bind_all_fibs == 0 && fib != inp->inp_inc.inc_fibnum)
+                       /*
+                        * Sockets bound to a specific FIB can only receive
+                        * packets from that FIB.
+                        */
+                       continue;
+
                /*
                 * Handle socket delivery policy for any-source
                 * and source-specific multicast. [RFC3678]
@@ -453,7 +468,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
        struct sockaddr_in udp_in[2];
        struct mbuf *m;
        struct m_tag *fwd_tag;
-       int cscov_partial, iphlen;
+       int cscov_partial, iphlen, lookupflags;
 
        m = *mp;
        iphlen = *offp;
@@ -575,7 +590,11 @@ udp_input(struct mbuf **mp, int *offp, int proto)
 
        /*
         * Locate pcb for datagram.
-        *
+        */
+       lookupflags = INPLOOKUP_RLOCKPCB |
+           (V_udp_bind_all_fibs ? 0 : INPLOOKUP_FIB);
+
+       /*
         * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
         */
        if ((m->m_flags & M_IP_NEXTHOP) &&
@@ -589,7 +608,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
                 * Already got one like this?
                 */
                inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
-                   ip->ip_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, ifp, m);
+                   ip->ip_dst, uh->uh_dport, lookupflags, ifp, m);
                if (!inp) {
                        /*
                         * It's new.  Try to find the ambushing socket.
@@ -599,8 +618,8 @@ udp_input(struct mbuf **mp, int *offp, int proto)
                        inp = in_pcblookup(pcbinfo, ip->ip_src,
                            uh->uh_sport, next_hop->sin_addr,
                            next_hop->sin_port ? htons(next_hop->sin_port) :
-                           uh->uh_dport, INPLOOKUP_WILDCARD |
-                           INPLOOKUP_RLOCKPCB, ifp);
+                           uh->uh_dport, INPLOOKUP_WILDCARD | lookupflags,
+                           ifp);
                }
                /* Remove the tag from the packet. We don't need it anymore. */
                m_tag_delete(m, fwd_tag);
@@ -608,7 +627,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
        } else
                inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
                    ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD |
-                   INPLOOKUP_RLOCKPCB, ifp, m);
+                   lookupflags, ifp, m);
        if (inp == NULL) {
                if (V_udp_log_in_vain) {
                        char src[INET_ADDRSTRLEN];
@@ -1242,8 +1261,8 @@ udp_send(struct socket *so, int flags, struct mbuf *m, 
struct sockaddr *addr,
                        inp->inp_vflag &= ~INP_IPV6;
                }
                INP_HASH_WLOCK(pcbinfo);
-               error = in_pcbbind_setup(inp, &src, &laddr.s_addr, &lport, 0,
-                   td->td_ucred);
+               error = in_pcbbind_setup(inp, &src, &laddr.s_addr, &lport,
+                   V_udp_bind_all_fibs ? 0 : INPBIND_FIB, td->td_ucred);
                INP_HASH_WUNLOCK(pcbinfo);
                if ((flags & PRUS_IPV6) != 0)
                        inp->inp_vflag = vflagsav;
@@ -1592,7 +1611,8 @@ udp_bind(struct socket *so, struct sockaddr *nam, struct 
thread *td)
 
        INP_WLOCK(inp);
        INP_HASH_WLOCK(pcbinfo);
-       error = in_pcbbind(inp, sinp, 0, td->td_ucred);
+       error = in_pcbbind(inp, sinp, V_udp_bind_all_fibs ? 0 : INPBIND_FIB,
+           td->td_ucred);
        INP_HASH_WUNLOCK(pcbinfo);
        INP_WUNLOCK(inp);
        return (error);
diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h
index 2528e4fcb30f..3895f365db3c 100644
--- a/sys/netinet/udp_var.h
+++ b/sys/netinet/udp_var.h
@@ -155,13 +155,15 @@ VNET_DECLARE(struct inpcbinfo, ulitecbinfo);
 
 extern u_long                  udp_sendspace;
 extern u_long                  udp_recvspace;
-VNET_DECLARE(int, udp_cksum);
+VNET_DECLARE(int, udp_bind_all_fibs);
 VNET_DECLARE(int, udp_blackhole);
 VNET_DECLARE(bool, udp_blackhole_local);
+VNET_DECLARE(int, udp_cksum);
 VNET_DECLARE(int, udp_log_in_vain);
-#define        V_udp_cksum             VNET(udp_cksum)
+#define        V_udp_bind_all_fibs     VNET(udp_bind_all_fibs)
 #define        V_udp_blackhole         VNET(udp_blackhole)
 #define        V_udp_blackhole_local   VNET(udp_blackhole_local)
+#define        V_udp_cksum             VNET(udp_cksum)
 #define        V_udp_log_in_vain       VNET(udp_log_in_vain)
 
 VNET_DECLARE(int, zero_checksum_port);
diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c
index 07875efcb144..40216ad4c420 100644
--- a/sys/netinet6/udp6_usrreq.c
+++ b/sys/netinet6/udp6_usrreq.c
@@ -357,6 +357,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
        int off = *offp;
        int cscov_partial;
        int plen, ulen;
+       int lookupflags;
        struct sockaddr_in6 fromsa[2];
        struct m_tag *fwd_tag;
        uint16_t uh_sum;
@@ -454,6 +455,8 @@ skip_checksum:
        /*
         * Locate pcb for datagram.
         */
+       lookupflags = INPLOOKUP_RLOCKPCB |
+           (V_udp_bind_all_fibs ? 0 : INPLOOKUP_FIB);
 
        /*
         * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
@@ -470,7 +473,7 @@ skip_checksum:
                 */
                inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
                    uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
-                   INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m);
+                   lookupflags, m->m_pkthdr.rcvif, m);
                if (!inp) {
                        /*
                         * It's new.  Try to find the ambushing socket.
@@ -480,8 +483,8 @@ skip_checksum:
                        inp = in6_pcblookup(pcbinfo, &ip6->ip6_src,
                            uh->uh_sport, &next_hop6->sin6_addr,
                            next_hop6->sin6_port ? htons(next_hop6->sin6_port) :
-                           uh->uh_dport, INPLOOKUP_WILDCARD |
-                           INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif);
+                           uh->uh_dport, INPLOOKUP_WILDCARD | lookupflags,
+                           m->m_pkthdr.rcvif);
                }
                /* Remove the tag from the packet. We don't need it anymore. */
                m_tag_delete(m, fwd_tag);
@@ -489,7 +492,7 @@ skip_checksum:
        } else
                inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
                    uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
-                   INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
+                   INPLOOKUP_WILDCARD | lookupflags,
                    m->m_pkthdr.rcvif, m);
        if (inp == NULL) {
                if (V_udp_log_in_vain) {
@@ -1058,13 +1061,16 @@ udp6_bind(struct socket *so, struct sockaddr *nam, 
struct thread *td)
                        in6_sin6_2_sin(&sin, sin6_p);
                        inp->inp_vflag |= INP_IPV4;
                        inp->inp_vflag &= ~INP_IPV6;
-                       error = in_pcbbind(inp, &sin, 0, td->td_ucred);
+                       error = in_pcbbind(inp, &sin,
+                           V_udp_bind_all_fibs ? 0 : INPBIND_FIB,
+                           td->td_ucred);
                        goto out;
                }
 #endif
        }
 
-       error = in6_pcbbind(inp, sin6_p, 0, td->td_ucred);
+       error = in6_pcbbind(inp, sin6_p, V_udp_bind_all_fibs ? 0 : INPBIND_FIB,
+           td->td_ucred);
 #ifdef INET
 out:
 #endif

Reply via email to