this is the second fibnum patch, which replaces (doesn't add to) the first.
some blanks/tabs/margins lint was incidentally fixed, a few comments were
added, the API of several existing functions was changed, and some wrappers
were added to others. as explained inline below, this handles both TCP and UDP
listeners now. i did not add an SO_FIB operator nor shim SO_SETFIB since those
are independent of this socket-related work, needed to get path symmetry for
shell-related listeners like sshd.
comments, questions, and especially testing results would be very welcome.
vixie
---
On Monday, January 6, 2025 3:56:55 PM UTC Mark Johnston wrote:
> On Fri, Dec 27, 2024 at 08:48:48AM +0000, Paul Vixie wrote:
> > On Tuesday, December 24, 2024 3:34:45 AM UTC Santiago Martinez wrote:
> > > here’s another user of fibs. Each of our servers have multiple fibs and
> > > jails with fibs. I like the proposed.
> >
> > On Tuesday, December 24, 2024 5:06:32 AM UTC Jamie Landeg-Jones wrote:
> > > I like that. I isolate 5 seperate networks by assigning a fib to each
> > > interface, and was initially surprised that I had to jump through ipfw
> > > hoops to get it to work properly, in fact at the end of my ipfw rules
> > > for these interfaces, just to guarantee no leaking, ...
> > >
> > > So, yes, I agree that it's crocky, and your proposal is how I originally
> > > expected it to work, and indeed, I can so no reason for it not to work
> > > that way, but am prepared to be enlightened if anyone else has an
> > > opinion on this.
> >
> > Groovy. See attached patch. This is just for TCP since I have no way to
> > test SCTP and I think UDP will have to be handled at the application
> > layer. There are two one line changes here.
i was wrong, UDP doesn't need application layer changes. this patch is longer
than the first one, since it (1) does TCP in the way mark johnston suggested,
(2) also handles UDP, and (3) is tested against 14.2-P1 now although that
didn't change the patch. for what it's worth SCTP looks simple but i really
think it should be patched later by someone who can test it.
(On Monday, January 6, 2025 3:56:55 PM UTC Mark Johnston wrote:)
> One side effect of the patch is that a service listening in FIB 0 that
> has no route to the source address of a connection attempt from a
> different FIB would previously not accept such a connection, but now it
> will. Perhaps that's drastic enough to warrant a sysctl and/or sockopt
> to control this behaviour.
i hope not. moving from unintentional configuration-dependent failure to
unintentional configuration-dependent success is not a change that freebsd
usually optionalizes.
> It would be better to pass the fibnum to solisten_clone() and assign it
> there. Otherwise, the value of so_fibnum will be wrong for a short
> window during which the socket is passed to MAC and other hooks, which
> might have some surprising effects.
done.
--
Paul Vixie
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 84d817c9ada1..5021e135e752 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -988,13 +988,13 @@ SYSCTL_TIMEVAL_SEC(_kern_ipc, OID_AUTO, sooverinterval, CTLFLAG_RW,
* accept(2), the protocol has two options:
* 1) Call legacy sonewconn() function, which would call protocol attach
* method, same as used for socket(2).
- * 2) Call solisten_clone(), do attach that is specific to a cloned connection,
- * and then call solisten_enqueue().
+ * 2) Call solisten_clone(), do an attach that is specific to a cloned
+ * connection, and then call solisten_enqueue().
*
* Note: the ref count on the socket is 0 on return.
*/
struct socket *
-solisten_clone(struct socket *head)
+solisten_clone(struct socket *head, int fibnum)
{
struct sbuf descrsb;
struct socket *so;
@@ -1161,7 +1161,8 @@ solisten_clone(struct socket *head)
SO_LINGER | SO_OOBINLINE | SO_NOSIGPIPE);
so->so_linger = head->so_linger;
so->so_state = head->so_state;
- so->so_fibnum = head->so_fibnum;
+ /* note, C has short-circuit evaluation. */
+ so->so_fibnum = head->so_fibnum || fibnum;
so->so_proto = head->so_proto;
so->so_cred = crhold(head->so_cred);
#ifdef MAC
@@ -1198,7 +1199,7 @@ sonewconn(struct socket *head, int connstatus)
{
struct socket *so;
- if ((so = solisten_clone(head)) == NULL)
+ if ((so = solisten_clone(head, head->so_fibnum)) == NULL)
return (NULL);
if (so->so_proto->pr_attach(so, 0, NULL) != 0) {
diff --git a/sys/net/if.c b/sys/net/if.c
index edc7d8376bbf..da2041c7c980 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -1812,11 +1812,11 @@ ifa_free(struct ifaddr *ifa)
((const struct sockaddr_dl *)(a1))->sdl_alen) == 0))
/*
- * Locate an interface based on a complete address.
+ * Locate an interface based on a complete address,
+ * and optionally retrieve its FIB number.
*/
-/*ARGSUSED*/
struct ifaddr *
-ifa_ifwithaddr(const struct sockaddr *addr)
+ifa_ifwithaddr_getfib(const struct sockaddr *addr, uint16_t *fibnum)
{
struct ifnet *ifp;
struct ifaddr *ifa;
@@ -1841,17 +1841,23 @@ ifa_ifwithaddr(const struct sockaddr *addr)
}
ifa = NULL;
done:
+ if (ifp != NULL && fibnum != NULL)
+ *fibnum = ifp->if_fib;
return (ifa);
}
+/*
+ * Test for existence of an interface having this complete address,
+ * optionally retrieving its FIB number.
+ */
int
-ifa_ifwithaddr_check(const struct sockaddr *addr)
+ifa_ifwithaddr_check_getfib(const struct sockaddr *addr, uint16_t *fibnum)
{
struct epoch_tracker et;
int rc;
NET_EPOCH_ENTER(et);
- rc = (ifa_ifwithaddr(addr) != NULL);
+ rc = (ifa_ifwithaddr_getfib(addr, fibnum) != NULL);
NET_EPOCH_EXIT(et);
return (rc);
}
diff --git a/sys/net/if_var.h b/sys/net/if_var.h
index 74692e916558..4ebbb6982c10 100644
--- a/sys/net/if_var.h
+++ b/sys/net/if_var.h
@@ -528,13 +528,20 @@ int ifa_add_loopback_route(struct ifaddr *, struct sockaddr *);
int ifa_del_loopback_route(struct ifaddr *, struct sockaddr *);
int ifa_switch_loopback_route(struct ifaddr *, struct sockaddr *);
-struct ifaddr *ifa_ifwithaddr(const struct sockaddr *);
-int ifa_ifwithaddr_check(const struct sockaddr *);
+struct ifaddr *ifa_ifwithaddr_getfib(const struct sockaddr *, uint16_t *);
+int ifa_ifwithaddr_check_getfib(const struct sockaddr *,
+ uint16_t *);
+static inline struct ifaddr *ifa_ifwithaddr(const struct sockaddr *sa) {
+ return (ifa_ifwithaddr_getfib(sa, NULL));
+}
+static inline int ifa_ifwithaddr_check(const struct sockaddr *sa) {
+ return (ifa_ifwithaddr_check_getfib(sa, NULL));
+}
struct ifaddr *ifa_ifwithbroadaddr(const struct sockaddr *, int);
struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *, int);
struct ifaddr *ifa_ifwithnet(const struct sockaddr *, int, int);
struct ifaddr *ifa_ifwithroute(int, const struct sockaddr *,
- const struct sockaddr *, u_int);
+ const struct sockaddr *, u_int);
struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, if_t);
int ifa_preferred(struct ifaddr *, struct ifaddr *);
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index f6904690deab..f2591b387d50 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -691,7 +691,7 @@ in_pcbbind(struct inpcb *inp, struct sockaddr_in *sin, struct ucred *cred)
return (EINVAL);
anonport = sin == NULL || sin->sin_port == 0;
error = in_pcbbind_setup(inp, sin, &inp->inp_laddr.s_addr,
- &inp->inp_lport, cred);
+ &inp->inp_lport, &inp->inp_inc.inc_fibnum, cred);
if (error)
return (error);
if (in_pcbinshash(inp) != 0) {
@@ -870,11 +870,11 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
* calling in_pcbinshash(), or they can just use the resulting
* port and address to authorise the sending of a once-off packet.
*
- * On error, the values of *laddrp and *lportp are not changed.
+ * On error, the values of *laddrp, *lportp and *fibnum are not changed.
*/
int
in_pcbbind_setup(struct inpcb *inp, struct sockaddr_in *sin, in_addr_t *laddrp,
- u_short *lportp, struct ucred *cred)
+ u_short *lportp, uint16_t *fibnum, struct ucred *cred)
{
struct socket *so = inp->inp_socket;
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
@@ -945,8 +945,10 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr_in *sin, in_addr_t *laddrp,
* to any endpoint address, local or not.
*/
if ((inp->inp_flags & INP_BINDANY) == 0 &&
- ifa_ifwithaddr_check((struct sockaddr *)sin) == 0)
+ ifa_ifwithaddr_check_getfib((struct sockaddr *)sin,
+ fibnum) == 0) {
return (EADDRNOTAVAIL);
+ }
}
laddr = sin->sin_addr;
if (lport) {
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index 4844bbee3b54..fef196020a03 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -667,7 +667,7 @@ void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
int in_pcballoc(struct socket *, struct inpcbinfo *);
int in_pcbbind(struct inpcb *, struct sockaddr_in *, struct ucred *);
int in_pcbbind_setup(struct inpcb *, struct sockaddr_in *, in_addr_t *,
- u_short *, struct ucred *);
+ u_short *, uint16_t *, struct ucred *);
int in_pcbconnect(struct inpcb *, struct sockaddr_in *, struct ucred *,
bool);
int in_pcbconnect_setup(struct inpcb *, struct sockaddr_in *, in_addr_t *,
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 83f85a50ed40..6264948201cf 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -597,7 +597,7 @@ int
tcp6_input(struct mbuf **mp, int *offp, int proto)
{
- return(tcp6_input_with_port(mp, offp, proto, 0));
+ return (tcp6_input_with_port(mp, offp, proto, 0));
}
#endif /* INET6 */
@@ -1057,7 +1057,9 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
}
inc.inc_fport = th->th_sport;
inc.inc_lport = th->th_dport;
- inc.inc_fibnum = so->so_fibnum;
+
+ /* note, C has short-circuit evaluation. */
+ inc.inc_fibnum = so->so_fibnum || m->m_pkthdr.fibnum;
/*
* Check for an existing connection attempt in syncache if
@@ -1498,7 +1500,7 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
int
tcp_input(struct mbuf **mp, int *offp, int proto)
{
- return(tcp_input_with_port(mp, offp, proto, 0));
+ return (tcp_input_with_port(mp, offp, proto, 0));
}
static void
@@ -4141,7 +4143,7 @@ tcp_compute_pipe(struct tcpcb *tp)
tp->sackhint.sack_bytes_rexmit -
tp->sackhint.sacked_bytes);
} else {
- return((*tp->t_fb->tfb_compute_pipe)(tp));
+ return ((*tp->t_fb->tfb_compute_pipe)(tp));
}
}
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 15244a61d8da..0601871fda08 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -803,7 +803,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
* as they would have been set up if we had created the
* connection when the SYN arrived.
*/
- if ((so = solisten_clone(lso)) == NULL)
+ if ((so = solisten_clone(lso, sc->sc_inc.inc_fibnum)) == NULL)
goto allocfail;
#ifdef MAC
mac_socketpeer_set_from_mbuf(m, so);
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index 7329600ecc79..1e73e6315123 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -1218,7 +1218,7 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
inp->inp_vflag &= ~INP_IPV6;
}
INP_HASH_WLOCK(pcbinfo);
- error = in_pcbbind_setup(inp, &src, &laddr.s_addr, &lport,
+ error = in_pcbbind_setup(inp, &src, &laddr.s_addr, &lport, NULL,
td->td_ucred);
INP_HASH_WUNLOCK(pcbinfo);
if ((flags & PRUS_IPV6) != 0)
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index f1407b7ea237..4b6c1ae2a2c9 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -520,7 +520,7 @@ int solisten_proto_check(struct socket *so);
bool solisten_enqueue(struct socket *, int);
int solisten_dequeue(struct socket *, struct socket **, int);
struct socket *
- solisten_clone(struct socket *);
+ solisten_clone(struct socket *, int fibnum);
struct socket *
sonewconn(struct socket *head, int connstatus);
struct socket *