On Friday, February 21, 2025 12:35:17 AM UTC Paul Vixie wrote:
> On Thursday, February 20, 2025 4:47:41 PM UTC Mark Johnston wrote:
> > On Tue, Feb 18, 2025 at 05:16:07AM +0000, Paul Vixie wrote:
> > > this is the second fibnum patch, ...
now third, having ported the work to origin/main as of last night, which boots
as 15.0. testing of inbound tcp and udp shows that it still works as
described.
> > The handling of the second point seems incomplete: it doesn't update the
> > FIB number stored in the socket itself. Gleb and I talked a bit about
> > eliminating that field entirely so that there's only one source of
> > truth, and I think we'll eventually do that, but in the meantime,
> > in_pcbbind() needs to update so_fibnum as well.
>
> ..., i'll add the corresponding logic to in_pcbbind(), with an XXX marker.
this was also done in the attached diff.
> > This patch doesn't apply to main. Did you write it against stable/14?
>
> i've been working in a 14.2 context but i'll foreport and test.
see attached.
> thanks for engaging.
other comments, questions, complaints, or test results would be welcomed.
--
Paul Vixie
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 03bfea721dd2..d40af629b4ca 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -1009,13 +1009,13 @@ SYSCTL_TIMEVAL_SEC(_kern_ipc, OID_AUTO, sooverinterval, CTLFLAG_RW,
* accept(2), the protocol has two options:
* 1) Call legacy sonewconn() function, which would call protocol attach
* method, same as used for socket(2).
- * 2) Call solisten_clone(), do attach that is specific to a cloned connection,
- * and then call solisten_enqueue().
+ * 2) Call solisten_clone(), do an attach that is specific to a cloned
+ * connection, and then call solisten_enqueue().
*
* Note: the ref count on the socket is 0 on return.
*/
struct socket *
-solisten_clone(struct socket *head)
+solisten_clone(struct socket *head, int fibnum)
{
struct sbuf descrsb;
struct socket *so;
@@ -1186,7 +1186,8 @@ solisten_clone(struct socket *head)
SO_DONTROUTE | SO_LINGER | SO_OOBINLINE | SO_NOSIGPIPE);
so->so_linger = head->so_linger;
so->so_state = head->so_state;
- so->so_fibnum = head->so_fibnum;
+ /* note, C has short-circuit evaluation. */
+ so->so_fibnum = head->so_fibnum || fibnum;
so->so_proto = head->so_proto;
so->so_cred = crhold(head->so_cred);
#ifdef SOCKET_HHOOK
@@ -1232,7 +1233,7 @@ sonewconn(struct socket *head, int connstatus)
{
struct socket *so;
- if ((so = solisten_clone(head)) == NULL)
+ if ((so = solisten_clone(head, head->so_fibnum)) == NULL)
return (NULL);
if (so->so_proto->pr_attach(so, 0, NULL) != 0) {
diff --git a/sys/net/if.c b/sys/net/if.c
index ce0c3d5c0616..06cf8ba81b99 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -1807,11 +1807,11 @@ sa_dl_equal(const struct sockaddr *a, const struct sockaddr *b)
}
/*
- * Locate an interface based on a complete address.
+ * Locate an interface based on a complete address,
+ * and optionally retrieve its FIB number.
*/
-/*ARGSUSED*/
struct ifaddr *
-ifa_ifwithaddr(const struct sockaddr *addr)
+ifa_ifwithaddr_getfib(const struct sockaddr *addr, uint16_t *fibnum)
{
struct ifnet *ifp;
struct ifaddr *ifa;
@@ -1836,17 +1836,23 @@ ifa_ifwithaddr(const struct sockaddr *addr)
}
ifa = NULL;
done:
+ if (ifp != NULL && fibnum != NULL)
+ *fibnum = ifp->if_fib;
return (ifa);
}
+/*
+ * Test for existence of an interface having this complete address,
+ * optionally retrieving its FIB number.
+ */
int
-ifa_ifwithaddr_check(const struct sockaddr *addr)
+ifa_ifwithaddr_check_getfib(const struct sockaddr *addr, uint16_t *fibnum)
{
struct epoch_tracker et;
int rc;
NET_EPOCH_ENTER(et);
- rc = (ifa_ifwithaddr(addr) != NULL);
+ rc = (ifa_ifwithaddr_getfib(addr, fibnum) != NULL);
NET_EPOCH_EXIT(et);
return (rc);
}
diff --git a/sys/net/if_var.h b/sys/net/if_var.h
index 83d33330987e..8074042ba884 100644
--- a/sys/net/if_var.h
+++ b/sys/net/if_var.h
@@ -545,13 +545,20 @@ int ifa_add_loopback_route(struct ifaddr *, struct sockaddr *);
int ifa_del_loopback_route(struct ifaddr *, struct sockaddr *);
int ifa_switch_loopback_route(struct ifaddr *, struct sockaddr *);
-struct ifaddr *ifa_ifwithaddr(const struct sockaddr *);
-int ifa_ifwithaddr_check(const struct sockaddr *);
+struct ifaddr *ifa_ifwithaddr_getfib(const struct sockaddr *, uint16_t *);
+int ifa_ifwithaddr_check_getfib(const struct sockaddr *,
+ uint16_t *);
+static inline struct ifaddr *ifa_ifwithaddr(const struct sockaddr *sa) {
+ return (ifa_ifwithaddr_getfib(sa, NULL));
+}
+static inline int ifa_ifwithaddr_check(const struct sockaddr *sa) {
+ return (ifa_ifwithaddr_check_getfib(sa, NULL));
+}
struct ifaddr *ifa_ifwithbroadaddr(const struct sockaddr *, int);
struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *, int);
struct ifaddr *ifa_ifwithnet(const struct sockaddr *, int, int);
struct ifaddr *ifa_ifwithroute(int, const struct sockaddr *,
- const struct sockaddr *, u_int);
+ const struct sockaddr *, u_int);
struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, if_t);
int ifa_preferred(struct ifaddr *, struct ifaddr *);
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 1d9cc1866e15..899674779a23 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -735,9 +735,10 @@ in_pcbbind(struct inpcb *inp, struct sockaddr_in *sin, int flags,
return (EINVAL);
anonport = sin == NULL || sin->sin_port == 0;
error = in_pcbbind_setup(inp, sin, &inp->inp_laddr.s_addr,
- &inp->inp_lport, flags, cred);
+ &inp->inp_lport, &inp->inp_inc.inc_fibnum, flags, cred);
if (error)
return (error);
+
if (in_pcbinshash(inp) != 0) {
inp->inp_laddr.s_addr = INADDR_ANY;
inp->inp_lport = 0;
@@ -746,6 +747,8 @@ in_pcbbind(struct inpcb *inp, struct sockaddr_in *sin, int flags,
}
if (anonport)
inp->inp_flags |= INP_ANONPORT;
+ /* synchronize socket's fibnum with in_conninfo fibnum (XXX) */
+ inp->inp_socket->so_fibnum = inp->inp_inc.inc_fibnum;
return (0);
}
#endif
@@ -917,7 +920,7 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
static int
in_pcbbind_avail(struct inpcb *inp, const struct in_addr laddr,
const u_short lport, const int fib, int sooptions, int lookupflags,
- struct ucred *cred)
+ u_int16_t *fibnum, struct ucred *cred)
{
int reuseport, reuseport_lb;
@@ -957,7 +960,8 @@ in_pcbbind_avail(struct inpcb *inp, const struct in_addr laddr,
* to any endpoint address, local or not.
*/
if ((inp->inp_flags & INP_BINDANY) == 0 &&
- ifa_ifwithaddr_check((const struct sockaddr *)&sin) == 0)
+ ifa_ifwithaddr_check_getfib((const struct sockaddr *)&sin,
+ fibnum) == 0)
return (EADDRNOTAVAIL);
}
@@ -1011,11 +1015,11 @@ in_pcbbind_avail(struct inpcb *inp, const struct in_addr laddr,
* calling in_pcbinshash(), or they can just use the resulting
* port and address to authorise the sending of a once-off packet.
*
- * On error, the values of *laddrp and *lportp are not changed.
+ * On error, the values of *laddrp, *lportp and *fibnum are not changed.
*/
int
in_pcbbind_setup(struct inpcb *inp, struct sockaddr_in *sin, in_addr_t *laddrp,
- u_short *lportp, int flags, struct ucred *cred)
+ u_short *lportp, uint16_t *fibnum, int flags, struct ucred *cred)
{
struct socket *so = inp->inp_socket;
struct in_addr laddr;
@@ -1061,7 +1065,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr_in *sin, in_addr_t *laddrp,
/* See if this address/port combo is available. */
error = in_pcbbind_avail(inp, laddr, lport, fib, sooptions,
- lookupflags, cred);
+ lookupflags, fibnum, cred);
if (error != 0)
return (error);
}
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index b0c64cfbd7ea..90e93a79fbf9 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -643,7 +643,7 @@ int in_pcballoc(struct socket *, struct inpcbinfo *);
#define INPBIND_FIB 0x0001 /* bind to the PCB's FIB only */
int in_pcbbind(struct inpcb *, struct sockaddr_in *, int, struct ucred *);
int in_pcbbind_setup(struct inpcb *, struct sockaddr_in *, in_addr_t *,
- u_short *, int, struct ucred *);
+ u_short *, u_int16_t *, int, struct ucred *);
int in_pcbconnect(struct inpcb *, struct sockaddr_in *, struct ucred *);
int in_pcbconnect_setup(const struct inpcb *, struct sockaddr_in *,
in_addr_t *, u_short *, in_addr_t *, u_short *, struct ucred *);
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index e21043fac0cf..9d80788bec8b 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -603,7 +603,7 @@ int
tcp6_input(struct mbuf **mp, int *offp, int proto)
{
- return(tcp6_input_with_port(mp, offp, proto, 0));
+ return (tcp6_input_with_port(mp, offp, proto, 0));
}
#endif /* INET6 */
@@ -1064,7 +1064,9 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
}
inc.inc_fport = th->th_sport;
inc.inc_lport = th->th_dport;
- inc.inc_fibnum = so->so_fibnum;
+
+ /* note, C has short-circuit evaluation. */
+ inc.inc_fibnum = so->so_fibnum || m->m_pkthdr.fibnum;
/*
* Check for an existing connection attempt in syncache if
@@ -1496,7 +1498,7 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
int
tcp_input(struct mbuf **mp, int *offp, int proto)
{
- return(tcp_input_with_port(mp, offp, proto, 0));
+ return (tcp_input_with_port(mp, offp, proto, 0));
}
static void
@@ -4212,7 +4214,7 @@ tcp_compute_pipe(struct tcpcb *tp)
tp->sackhint.sacked_bytes -
tp->sackhint.lost_bytes);
} else {
- return((*tp->t_fb->tfb_compute_pipe)(tp));
+ return ((*tp->t_fb->tfb_compute_pipe)(tp));
}
}
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 606d808676e1..f0b64edcdacc 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -794,7 +794,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
* as they would have been set up if we had created the
* connection when the SYN arrived.
*/
- if ((so = solisten_clone(lso)) == NULL)
+ if ((so = solisten_clone(lso, sc->sc_inc.inc_fibnum)) == NULL)
goto allocfail;
#ifdef MAC
mac_socketpeer_set_from_mbuf(m, so);
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index 131242ce9859..9ca16f68b741 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -1261,7 +1261,7 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
}
INP_HASH_WLOCK(pcbinfo);
error = in_pcbbind_setup(inp, &src, &laddr.s_addr, &lport,
- V_udp_bind_all_fibs ? 0 : INPBIND_FIB, td->td_ucred);
+ NULL, V_udp_bind_all_fibs ? 0 : INPBIND_FIB, td->td_ucred);
INP_HASH_WUNLOCK(pcbinfo);
if ((flags & PRUS_IPV6) != 0)
inp->inp_vflag = vflagsav;
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index 735ff49062de..d00da58ff62f 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -520,7 +520,7 @@ int solisten_proto_check(struct socket *so);
bool solisten_enqueue(struct socket *, int);
int solisten_dequeue(struct socket *, struct socket **, int);
struct socket *
- solisten_clone(struct socket *);
+ solisten_clone(struct socket *, int fibnum);
struct socket *
sonewconn(struct socket *head, int connstatus);
struct socket *