The branch main has been updated by glebius: URL: https://cgit.FreeBSD.org/src/commit/?id=607f11055d2d421770963162a4d9a99cdd136152
commit 607f11055d2d421770963162a4d9a99cdd136152 Author: Gleb Smirnoff <[email protected]> AuthorDate: 2025-12-08 17:20:22 +0000 Commit: Gleb Smirnoff <[email protected]> CommitDate: 2025-12-08 17:20:22 +0000 linux: store Linux Ethernet interface number in struct ifnet The old approach where we go through the list of interfaces and count them has bugs. One obvious bug with this dynamic translation is that once an Ethernet interface in the middle of the list goes away, all interfaces following it would change their Linux names. A bigger problem is the ifnet arrival and departure times. For example linsysfs has event handler for ifnet_arrival_event, and of course it wants to resolve the name. This accidentially works, due to a bug in if_attach() where we call if_link_ifnet() before invoking all the event handlers. Once the bug is fixed linsysfs won't be able to resolve the old way. The other side is ifnet_departure_event, where there is no bug, the eventhandlers are called after the if_unlink_ifnet(). This means old translation won't work for departure event handlers. One example is netlink. This change gives the Netlink a chance to emit a proper Linux interface departure message. However, there is another problem in Netlink, that the ifnet pointer is lost in the Netlink translation layer. Plug this with a cookie in netlink writer structure that can be set by the route layer and used by the Netlink Linux translation layer. This part of the diff seems unrelated, but it is hard to make it a separate change, as the old KPI goes away and to use the new one we need the pointer. Differential Revision: https://reviews.freebsd.org/D54077 --- sys/compat/linsysfs/linsysfs_net.c | 12 ++-- sys/compat/linux/linux.h | 2 + sys/compat/linux/linux_common.c | 2 + sys/compat/linux/linux_common.h | 5 +- sys/compat/linux/linux_if.c | 135 ++++++++++++++++++----------------- sys/compat/linux/linux_netlink.c | 7 +- sys/net/if_private.h | 1 + sys/netlink/netlink_io.c | 2 +- sys/netlink/netlink_linux.h | 3 +- sys/netlink/netlink_message_writer.h | 2 + sys/netlink/route/iface.c | 2 + 11 files changed, 95 insertions(+), 78 deletions(-) diff --git a/sys/compat/linsysfs/linsysfs_net.c b/sys/compat/linsysfs/linsysfs_net.c index 751dbb5b3713..7439b0b4fdc0 100644 --- a/sys/compat/linsysfs/linsysfs_net.c +++ b/sys/compat/linsysfs/linsysfs_net.c @@ -90,7 +90,7 @@ linsysfs_if_addr(PFS_FILL_ARGS) CURVNET_SET(TD_TO_VNET(td)); NET_EPOCH_ENTER(et); - ifp = ifname_linux_to_ifp(td, pn->pn_parent->pn_name); + ifp = ifname_linux_to_ifp(pn->pn_parent->pn_name); if (ifp != NULL && (error = linux_ifhwaddr(ifp, &lsa)) == 0) error = sbuf_printf(sb, "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx\n", lsa.sa_data[0], lsa.sa_data[1], lsa.sa_data[2], @@ -119,7 +119,7 @@ linsysfs_if_flags(PFS_FILL_ARGS) CURVNET_SET(TD_TO_VNET(td)); NET_EPOCH_ENTER(et); - ifp = ifname_linux_to_ifp(td, pn->pn_parent->pn_name); + ifp = ifname_linux_to_ifp(pn->pn_parent->pn_name); if (ifp != NULL) error = sbuf_printf(sb, "0x%x\n", linux_ifflags(ifp)); else @@ -138,7 +138,7 @@ linsysfs_if_ifindex(PFS_FILL_ARGS) CURVNET_SET(TD_TO_VNET(td)); NET_EPOCH_ENTER(et); - ifp = ifname_linux_to_ifp(td, pn->pn_parent->pn_name); + ifp = ifname_linux_to_ifp(pn->pn_parent->pn_name); if (ifp != NULL) error = sbuf_printf(sb, "%u\n", if_getindex(ifp)); else @@ -157,7 +157,7 @@ linsysfs_if_mtu(PFS_FILL_ARGS) CURVNET_SET(TD_TO_VNET(td)); NET_EPOCH_ENTER(et); - ifp = ifname_linux_to_ifp(td, pn->pn_parent->pn_name); + ifp = ifname_linux_to_ifp( pn->pn_parent->pn_name); if (ifp != NULL) error = sbuf_printf(sb, "%u\n", if_getmtu(ifp)); else @@ -186,7 +186,7 @@ linsysfs_if_type(PFS_FILL_ARGS) CURVNET_SET(TD_TO_VNET(td)); NET_EPOCH_ENTER(et); - ifp = ifname_linux_to_ifp(td, pn->pn_parent->pn_name); + ifp = ifname_linux_to_ifp(pn->pn_parent->pn_name); if (ifp != NULL && (error = linux_ifhwaddr(ifp, &lsa)) == 0) error = sbuf_printf(sb, "%d\n", lsa.sa_family); else @@ -207,7 +207,7 @@ linsysfs_if_visible(PFS_VIS_ARGS) visible = 0; CURVNET_SET(TD_TO_VNET(td)); NET_EPOCH_ENTER(et); - ifp = ifname_linux_to_ifp(td, pn->pn_name); + ifp = ifname_linux_to_ifp(pn->pn_name); if (ifp != NULL) { TAILQ_FOREACH_SAFE(nq, &ifp_nodes_q, ifp_nodes_next, nq_tmp) { if (nq->ifp == ifp && nq->vnet == curvnet) { diff --git a/sys/compat/linux/linux.h b/sys/compat/linux/linux.h index 625aee2be127..67acd726a503 100644 --- a/sys/compat/linux/linux.h +++ b/sys/compat/linux/linux.h @@ -359,6 +359,8 @@ struct l_statx { ktrstruct("l_sigset_t", (s), l) #endif +void linux_ifnet_init(void); +void linux_ifnet_uninit(void); void linux_netlink_register(void); void linux_netlink_deregister(void); diff --git a/sys/compat/linux/linux_common.c b/sys/compat/linux/linux_common.c index e22e29ff2b24..d39054a647f7 100644 --- a/sys/compat/linux/linux_common.c +++ b/sys/compat/linux/linux_common.c @@ -56,6 +56,7 @@ linux_common_modevent(module_t mod, int type, void *data) linux_osd_jail_register(); SET_FOREACH(ldhp, linux_device_handler_set) linux_device_register_handler(*ldhp); + linux_ifnet_init(); linux_netlink_register(); break; case MOD_UNLOAD: @@ -63,6 +64,7 @@ linux_common_modevent(module_t mod, int type, void *data) linux_osd_jail_deregister(); SET_FOREACH(ldhp, linux_device_handler_set) linux_device_unregister_handler(*ldhp); + linux_ifnet_uninit(); linux_netlink_deregister(); break; default: diff --git a/sys/compat/linux/linux_common.h b/sys/compat/linux/linux_common.h index 814c183b338a..44ba63c44278 100644 --- a/sys/compat/linux/linux_common.h +++ b/sys/compat/linux/linux_common.h @@ -28,10 +28,9 @@ #ifndef _LINUX_COMMON_H_ #define _LINUX_COMMON_H_ -int ifname_bsd_to_linux_ifp(struct ifnet *, char *, size_t); +int ifname_bsd_to_linux_ifp(const struct ifnet *, char *, size_t); int ifname_bsd_to_linux_idx(u_int, char *, size_t); -int ifname_bsd_to_linux_name(const char *, char *, size_t); -struct ifnet *ifname_linux_to_ifp(struct thread *, const char *); +struct ifnet *ifname_linux_to_ifp( const char *); int ifname_linux_to_bsd(struct thread *, const char *, char *); unsigned short linux_ifflags(struct ifnet *); diff --git a/sys/compat/linux/linux_if.c b/sys/compat/linux/linux_if.c index 29e86d71aa5a..7c55ab9fd49c 100644 --- a/sys/compat/linux/linux_if.c +++ b/sys/compat/linux/linux_if.c @@ -24,7 +24,9 @@ */ #include <sys/param.h> +#include <sys/systm.h> #include <sys/ctype.h> +#include <sys/eventhandler.h> #include <sys/jail.h> #include <sys/socket.h> #include <sys/sysctl.h> @@ -32,6 +34,8 @@ #include <net/if_dl.h> #include <net/if_types.h> #include <net/if_var.h> +#include <net/if_private.h> +#include <net/vnet.h> #include <compat/linux/linux.h> #include <compat/linux/linux_common.h> @@ -44,33 +48,67 @@ SYSCTL_BOOL(_compat_linux, OID_AUTO, use_real_ifnames, CTLFLAG_RWTUN, &use_real_ifnames, 0, "Use FreeBSD interface names instead of generating ethN aliases"); -/* - * Criteria for interface name translation - */ -#define IFP_IS_ETH(ifp) (if_gettype(ifp) == IFT_ETHER) -#define IFP_IS_LOOP(ifp) (if_gettype(ifp) == IFT_LOOP) +VNET_DEFINE_STATIC(struct unrhdr *, linux_eth_unr); +#define V_linux_eth_unr VNET(linux_eth_unr) -/* - * Translate a FreeBSD interface name to a Linux interface name - * by interface name, and return the number of bytes copied to lxname. - */ -int -ifname_bsd_to_linux_name(const char *bsdname, char *lxname, size_t len) +static eventhandler_tag ifnet_arrival_tag; +static eventhandler_tag ifnet_departure_tag; + +static void +linux_ifnet_arrival(void *arg __unused, struct ifnet *ifp) { - struct epoch_tracker et; - struct ifnet *ifp; - int ret; + if (ifp->if_type == IFT_ETHER) + ifp->if_linux_ethno = alloc_unr(V_linux_eth_unr); +} - CURVNET_ASSERT_SET(); +static void +linux_ifnet_departure(void *arg __unused, struct ifnet *ifp) +{ + if (ifp->if_type == IFT_ETHER) + free_unr(V_linux_eth_unr, ifp->if_linux_ethno); +} - ret = 0; +void +linux_ifnet_init(void) +{ + ifnet_arrival_tag = EVENTHANDLER_REGISTER(ifnet_arrival_event, + linux_ifnet_arrival, NULL, EVENTHANDLER_PRI_FIRST); + ifnet_departure_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, + linux_ifnet_departure, NULL, EVENTHANDLER_PRI_LAST); +} + +void +linux_ifnet_uninit(void) +{ + EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ifnet_arrival_tag); + EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifnet_departure_tag); +} + +static void +linux_ifnet_vnet_init(void *arg __unused) +{ + struct epoch_tracker et; + struct if_iter it; + if_t ifp; + + V_linux_eth_unr = new_unrhdr(0, INT_MAX, NULL); NET_EPOCH_ENTER(et); - ifp = ifunit(bsdname); - if (ifp != NULL) - ret = ifname_bsd_to_linux_ifp(ifp, lxname, len); + for (ifp = if_iter_start(&it); ifp != NULL; ifp = if_iter_next(&it)) + linux_ifnet_arrival(NULL, ifp); NET_EPOCH_EXIT(et); - return (ret); } +VNET_SYSINIT(linux_ifnet_vnet_init, SI_SUB_PROTO_IF, SI_ORDER_ANY, + linux_ifnet_vnet_init, NULL); + +#ifdef VIMAGE +static void +linux_ifnet_vnet_uninit(void *arg __unused) +{ + delete_unrhdr(V_linux_eth_unr); +} +VNET_SYSUNINIT(linux_ifnet_vnet_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, + linux_ifnet_vnet_uninit, NULL); +#endif /* * Translate a FreeBSD interface name to a Linux interface name @@ -99,50 +137,23 @@ ifname_bsd_to_linux_idx(u_int idx, char *lxname, size_t len) * and return the number of bytes copied to lxname, 0 if interface * not found, -1 on error. */ -struct ifname_bsd_to_linux_ifp_cb_s { - struct ifnet *ifp; - int ethno; - char *lxname; - size_t len; -}; - -static int -ifname_bsd_to_linux_ifp_cb(if_t ifp, void *arg) -{ - struct ifname_bsd_to_linux_ifp_cb_s *cbs = arg; - - if (ifp == cbs->ifp) - return (snprintf(cbs->lxname, cbs->len, "eth%d", cbs->ethno)); - if (IFP_IS_ETH(ifp)) - cbs->ethno++; - return (0); -} - int -ifname_bsd_to_linux_ifp(struct ifnet *ifp, char *lxname, size_t len) +ifname_bsd_to_linux_ifp(const struct ifnet *ifp, char *lxname, size_t len) { - struct ifname_bsd_to_linux_ifp_cb_s arg = { - .ifp = ifp, - .ethno = 0, - .lxname = lxname, - .len = len, - }; - - NET_EPOCH_ASSERT(); - /* * Linux loopback interface name is lo (not lo0), * we translate lo to lo0, loX to loX. */ - if (IFP_IS_LOOP(ifp) && strncmp(if_name(ifp), "lo0", IFNAMSIZ) == 0) + if (ifp->if_type == IFT_LOOP && + strncmp(ifp->if_xname, "lo0", IFNAMSIZ) == 0) return (strlcpy(lxname, "lo", len)); /* Short-circuit non ethernet interfaces. */ - if (!IFP_IS_ETH(ifp) || use_real_ifnames) - return (strlcpy(lxname, if_name(ifp), len)); + if (ifp->if_type != IFT_ETHER || use_real_ifnames) + return (strlcpy(lxname, ifp->if_xname, len)); /* Determine the (relative) unit number for ethernet interfaces. */ - return (if_foreach(ifname_bsd_to_linux_ifp_cb, &arg)); + return (snprintf(lxname, len, "eth%d", ifp->if_linux_ethno)); } /* @@ -154,7 +165,6 @@ ifname_bsd_to_linux_ifp(struct ifnet *ifp, char *lxname, size_t len) struct ifname_linux_to_ifp_cb_s { bool is_lo; bool is_eth; - int ethno; int unit; const char *lxname; if_t ifp; @@ -174,12 +184,11 @@ ifname_linux_to_ifp_cb(if_t ifp, void *arg) */ if (strncmp(if_name(ifp), cbs->lxname, LINUX_IFNAMSIZ) == 0) goto out; - if (cbs->is_eth && IFP_IS_ETH(ifp) && cbs->unit == cbs->ethno) + if (cbs->is_eth && ifp->if_type == IFT_ETHER && + ifp->if_linux_ethno == cbs->unit) goto out; - if (cbs->is_lo && IFP_IS_LOOP(ifp)) + if (cbs->is_lo && ifp->if_type == IFT_LOOP) goto out; - if (IFP_IS_ETH(ifp)) - cbs->ethno++; return (0); out: @@ -188,12 +197,10 @@ out: } struct ifnet * -ifname_linux_to_ifp(struct thread *td, const char *lxname) +ifname_linux_to_ifp(const char *lxname) { struct ifname_linux_to_ifp_cb_s arg = { - .ethno = 0, .lxname = lxname, - .ifp = NULL, }; int len; char *ep; @@ -228,7 +235,7 @@ ifname_linux_to_bsd(struct thread *td, const char *lxname, char *bsdname) CURVNET_SET(TD_TO_VNET(td)); NET_EPOCH_ENTER(et); - ifp = ifname_linux_to_ifp(td, lxname); + ifp = ifname_linux_to_ifp(lxname); if (ifp != NULL && bsdname != NULL) strlcpy(bsdname, if_name(ifp), IFNAMSIZ); NET_EPOCH_EXIT(et); @@ -297,12 +304,12 @@ linux_ifhwaddr(struct ifnet *ifp, struct l_sockaddr *lsa) NET_EPOCH_ASSERT(); - if (IFP_IS_LOOP(ifp)) { + if (ifp->if_type == IFT_LOOP) { bzero(lsa, sizeof(*lsa)); lsa->sa_family = LINUX_ARPHRD_LOOPBACK; return (0); } - if (!IFP_IS_ETH(ifp)) + if (ifp->if_type != IFT_ETHER) return (ENOENT); if (if_foreach_addr_type(ifp, AF_LINK, linux_ifhwaddr_cb, lsa) > 0) return (0); diff --git a/sys/compat/linux/linux_netlink.c b/sys/compat/linux/linux_netlink.c index 6aeafe84adc6..6dd2ad7ad8b0 100644 --- a/sys/compat/linux/linux_netlink.c +++ b/sys/compat/linux/linux_netlink.c @@ -249,9 +249,9 @@ nlmsg_translate_ifname_nla(struct nlattr *nla, struct nl_writer *nw) { char ifname[LINUX_IFNAMSIZ]; - if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname, - sizeof(ifname)) <= 0) + if (nw->ifp == NULL) return (false); + (void)ifname_bsd_to_linux_ifp(nw->ifp, ifname, sizeof(ifname)); return (nlattr_add_string(nw, IFLA_IFNAME, ifname)); } @@ -564,7 +564,7 @@ nlmsg_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) } static struct nl_buf * -nlmsgs_to_linux(struct nl_buf *orig, struct nlpcb *nlp) +nlmsgs_to_linux(struct nl_buf *orig, struct nlpcb *nlp, const struct ifnet *ifp) { struct nl_writer nw; u_int offset, msglen; @@ -573,6 +573,7 @@ nlmsgs_to_linux(struct nl_buf *orig, struct nlpcb *nlp) orig->datalen + SCRATCH_BUFFER_SIZE, nlp, false))) return (NULL); + nw.ifp = ifp; /* Assume correct headers. Buffer IS mutable */ for (offset = 0; offset + sizeof(struct nlmsghdr) <= orig->datalen; diff --git a/sys/net/if_private.h b/sys/net/if_private.h index 3da529e6b22e..b8cd0722eba6 100644 --- a/sys/net/if_private.h +++ b/sys/net/if_private.h @@ -65,6 +65,7 @@ struct ifnet { void *if_linkmib; /* link-type-specific MIB data */ size_t if_linkmiblen; /* length of above data */ u_int if_refcount; /* reference count */ + u_int if_linux_ethno; /* linux name id for IFT_ETHER */ /* These fields are shared with struct if_data. */ uint8_t if_type; /* ethernet, tokenring, etc */ diff --git a/sys/netlink/netlink_io.c b/sys/netlink/netlink_io.c index 2391d8ea752c..882c2181d24f 100644 --- a/sys/netlink/netlink_io.c +++ b/sys/netlink/netlink_io.c @@ -217,7 +217,7 @@ nl_send(struct nl_writer *nw, struct nlpcb *nlp) } if (nlp->nl_linux && linux_netlink_p != NULL) { - nb = linux_netlink_p->msgs_to_linux(nw->buf, nlp); + nb = linux_netlink_p->msgs_to_linux(nw->buf, nlp, nw->ifp); nl_buf_free(nw->buf); nw->buf = NULL; if (nb == NULL) diff --git a/sys/netlink/netlink_linux.h b/sys/netlink/netlink_linux.h index 794065692901..e9a25a04bb2b 100644 --- a/sys/netlink/netlink_linux.h +++ b/sys/netlink/netlink_linux.h @@ -37,7 +37,8 @@ struct nlpcb; struct nl_pstate; struct nl_writer; -typedef struct nl_buf * msgs_to_linux_cb_t(struct nl_buf *, struct nlpcb *); +typedef struct nl_buf * msgs_to_linux_cb_t(struct nl_buf *, struct nlpcb *, + const struct ifnet *); typedef int msg_from_linux_cb_t(int netlink_family, struct nlmsghdr **hdr, struct nl_pstate *npt); diff --git a/sys/netlink/netlink_message_writer.h b/sys/netlink/netlink_message_writer.h index ad2099a4d636..be3f349ce9f6 100644 --- a/sys/netlink/netlink_message_writer.h +++ b/sys/netlink/netlink_message_writer.h @@ -39,6 +39,7 @@ struct nl_buf; struct nl_writer; +struct ifnet; typedef bool nl_writer_cb(struct nl_writer *nw); struct nl_writer { @@ -53,6 +54,7 @@ struct nl_writer { int priv; } group; }; + const struct ifnet *ifp; /* Used by Linux translation only */ u_int num_messages; /* Number of messages in the buffer */ int malloc_flag; /* M_WAITOK or M_NOWAIT */ bool ignore_limit; /* If true, ignores RCVBUF limit */ diff --git a/sys/netlink/route/iface.c b/sys/netlink/route/iface.c index 9beb80792af4..70ec5e688c57 100644 --- a/sys/netlink/route/iface.c +++ b/sys/netlink/route/iface.c @@ -363,6 +363,8 @@ dump_iface(struct nl_writer *nw, if_t ifp, const struct nlmsghdr *hdr, ifc_dump_ifp_nl(ifp, nw); + nw->ifp = ifp; + if (nlmsg_end(nw)) return (true);
