The branch main has been updated by glebius:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=607f11055d2d421770963162a4d9a99cdd136152

commit 607f11055d2d421770963162a4d9a99cdd136152
Author:     Gleb Smirnoff <[email protected]>
AuthorDate: 2025-12-08 17:20:22 +0000
Commit:     Gleb Smirnoff <[email protected]>
CommitDate: 2025-12-08 17:20:22 +0000

    linux: store Linux Ethernet interface number in struct ifnet
    
    The old approach where we go through the list of interfaces and count them
    has bugs.  One obvious bug with this dynamic translation is that once an
    Ethernet interface in the middle of the list goes away, all interfaces
    following it would change their Linux names.
    
    A bigger problem is the ifnet arrival and departure times.  For example
    linsysfs has event handler for ifnet_arrival_event, and of course it wants
    to resolve the name.  This accidentially works, due to a bug in
    if_attach() where we call if_link_ifnet() before invoking all the event
    handlers.  Once the bug is fixed linsysfs won't be able to resolve the old
    way.  The other side is ifnet_departure_event, where there is no bug, the
    eventhandlers are called after the if_unlink_ifnet().  This means old
    translation won't work for departure event handlers.  One example is
    netlink.  This change gives the Netlink a chance to emit a proper Linux
    interface departure message.
    
    However, there is another problem in Netlink, that the ifnet pointer is
    lost in the Netlink translation layer.  Plug this with a cookie in netlink
    writer structure that can be set by the route layer and used by the Netlink
    Linux translation layer.  This part of the diff seems unrelated, but it is
    hard to make it a separate change, as the old KPI goes away and to use the
    new one we need the pointer.
    
    Differential Revision:  https://reviews.freebsd.org/D54077
---
 sys/compat/linsysfs/linsysfs_net.c   |  12 ++--
 sys/compat/linux/linux.h             |   2 +
 sys/compat/linux/linux_common.c      |   2 +
 sys/compat/linux/linux_common.h      |   5 +-
 sys/compat/linux/linux_if.c          | 135 ++++++++++++++++++-----------------
 sys/compat/linux/linux_netlink.c     |   7 +-
 sys/net/if_private.h                 |   1 +
 sys/netlink/netlink_io.c             |   2 +-
 sys/netlink/netlink_linux.h          |   3 +-
 sys/netlink/netlink_message_writer.h |   2 +
 sys/netlink/route/iface.c            |   2 +
 11 files changed, 95 insertions(+), 78 deletions(-)

diff --git a/sys/compat/linsysfs/linsysfs_net.c 
b/sys/compat/linsysfs/linsysfs_net.c
index 751dbb5b3713..7439b0b4fdc0 100644
--- a/sys/compat/linsysfs/linsysfs_net.c
+++ b/sys/compat/linsysfs/linsysfs_net.c
@@ -90,7 +90,7 @@ linsysfs_if_addr(PFS_FILL_ARGS)
 
        CURVNET_SET(TD_TO_VNET(td));
        NET_EPOCH_ENTER(et);
-       ifp = ifname_linux_to_ifp(td, pn->pn_parent->pn_name);
+       ifp = ifname_linux_to_ifp(pn->pn_parent->pn_name);
        if (ifp != NULL && (error = linux_ifhwaddr(ifp, &lsa)) == 0)
                error = sbuf_printf(sb, 
"%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx\n",
                    lsa.sa_data[0], lsa.sa_data[1], lsa.sa_data[2],
@@ -119,7 +119,7 @@ linsysfs_if_flags(PFS_FILL_ARGS)
 
        CURVNET_SET(TD_TO_VNET(td));
        NET_EPOCH_ENTER(et);
-       ifp = ifname_linux_to_ifp(td, pn->pn_parent->pn_name);
+       ifp = ifname_linux_to_ifp(pn->pn_parent->pn_name);
        if (ifp != NULL)
                error = sbuf_printf(sb, "0x%x\n", linux_ifflags(ifp));
        else
@@ -138,7 +138,7 @@ linsysfs_if_ifindex(PFS_FILL_ARGS)
 
        CURVNET_SET(TD_TO_VNET(td));
        NET_EPOCH_ENTER(et);
-       ifp = ifname_linux_to_ifp(td, pn->pn_parent->pn_name);
+       ifp = ifname_linux_to_ifp(pn->pn_parent->pn_name);
        if (ifp != NULL)
                error = sbuf_printf(sb, "%u\n", if_getindex(ifp));
        else
@@ -157,7 +157,7 @@ linsysfs_if_mtu(PFS_FILL_ARGS)
 
        CURVNET_SET(TD_TO_VNET(td));
        NET_EPOCH_ENTER(et);
-       ifp = ifname_linux_to_ifp(td, pn->pn_parent->pn_name);
+       ifp = ifname_linux_to_ifp( pn->pn_parent->pn_name);
        if (ifp != NULL)
                error = sbuf_printf(sb, "%u\n", if_getmtu(ifp));
        else
@@ -186,7 +186,7 @@ linsysfs_if_type(PFS_FILL_ARGS)
 
        CURVNET_SET(TD_TO_VNET(td));
        NET_EPOCH_ENTER(et);
-       ifp = ifname_linux_to_ifp(td, pn->pn_parent->pn_name);
+       ifp = ifname_linux_to_ifp(pn->pn_parent->pn_name);
        if (ifp != NULL && (error = linux_ifhwaddr(ifp, &lsa)) == 0)
                error = sbuf_printf(sb, "%d\n", lsa.sa_family);
        else
@@ -207,7 +207,7 @@ linsysfs_if_visible(PFS_VIS_ARGS)
        visible = 0;
        CURVNET_SET(TD_TO_VNET(td));
        NET_EPOCH_ENTER(et);
-       ifp = ifname_linux_to_ifp(td, pn->pn_name);
+       ifp = ifname_linux_to_ifp(pn->pn_name);
        if (ifp != NULL) {
                TAILQ_FOREACH_SAFE(nq, &ifp_nodes_q, ifp_nodes_next, nq_tmp) {
                        if (nq->ifp == ifp && nq->vnet == curvnet) {
diff --git a/sys/compat/linux/linux.h b/sys/compat/linux/linux.h
index 625aee2be127..67acd726a503 100644
--- a/sys/compat/linux/linux.h
+++ b/sys/compat/linux/linux.h
@@ -359,6 +359,8 @@ struct l_statx {
        ktrstruct("l_sigset_t", (s), l)
 #endif
 
+void linux_ifnet_init(void);
+void linux_ifnet_uninit(void);
 void linux_netlink_register(void);
 void linux_netlink_deregister(void);
 
diff --git a/sys/compat/linux/linux_common.c b/sys/compat/linux/linux_common.c
index e22e29ff2b24..d39054a647f7 100644
--- a/sys/compat/linux/linux_common.c
+++ b/sys/compat/linux/linux_common.c
@@ -56,6 +56,7 @@ linux_common_modevent(module_t mod, int type, void *data)
                linux_osd_jail_register();
                SET_FOREACH(ldhp, linux_device_handler_set)
                        linux_device_register_handler(*ldhp);
+               linux_ifnet_init();
                linux_netlink_register();
                break;
        case MOD_UNLOAD:
@@ -63,6 +64,7 @@ linux_common_modevent(module_t mod, int type, void *data)
                linux_osd_jail_deregister();
                SET_FOREACH(ldhp, linux_device_handler_set)
                        linux_device_unregister_handler(*ldhp);
+               linux_ifnet_uninit();
                linux_netlink_deregister();
                break;
        default:
diff --git a/sys/compat/linux/linux_common.h b/sys/compat/linux/linux_common.h
index 814c183b338a..44ba63c44278 100644
--- a/sys/compat/linux/linux_common.h
+++ b/sys/compat/linux/linux_common.h
@@ -28,10 +28,9 @@
 #ifndef _LINUX_COMMON_H_
 #define _LINUX_COMMON_H_
 
-int    ifname_bsd_to_linux_ifp(struct ifnet *, char *, size_t);
+int    ifname_bsd_to_linux_ifp(const struct ifnet *, char *, size_t);
 int    ifname_bsd_to_linux_idx(u_int, char *, size_t);
-int    ifname_bsd_to_linux_name(const char *, char *, size_t);
-struct ifnet *ifname_linux_to_ifp(struct thread *, const char *);
+struct ifnet *ifname_linux_to_ifp( const char *);
 int    ifname_linux_to_bsd(struct thread *, const char *, char *);
 
 unsigned short linux_ifflags(struct ifnet *);
diff --git a/sys/compat/linux/linux_if.c b/sys/compat/linux/linux_if.c
index 29e86d71aa5a..7c55ab9fd49c 100644
--- a/sys/compat/linux/linux_if.c
+++ b/sys/compat/linux/linux_if.c
@@ -24,7 +24,9 @@
  */
 
 #include <sys/param.h>
+#include <sys/systm.h>
 #include <sys/ctype.h>
+#include <sys/eventhandler.h>
 #include <sys/jail.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
@@ -32,6 +34,8 @@
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
+#include <net/if_private.h>
+#include <net/vnet.h>
 
 #include <compat/linux/linux.h>
 #include <compat/linux/linux_common.h>
@@ -44,33 +48,67 @@ SYSCTL_BOOL(_compat_linux, OID_AUTO, use_real_ifnames, 
CTLFLAG_RWTUN,
     &use_real_ifnames, 0,
     "Use FreeBSD interface names instead of generating ethN aliases");
 
-/*
- * Criteria for interface name translation
- */
-#define        IFP_IS_ETH(ifp)         (if_gettype(ifp) == IFT_ETHER)
-#define        IFP_IS_LOOP(ifp)        (if_gettype(ifp) == IFT_LOOP)
+VNET_DEFINE_STATIC(struct unrhdr *, linux_eth_unr);
+#define        V_linux_eth_unr VNET(linux_eth_unr)
 
-/*
- * Translate a FreeBSD interface name to a Linux interface name
- * by interface name, and return the number of bytes copied to lxname.
- */
-int
-ifname_bsd_to_linux_name(const char *bsdname, char *lxname, size_t len)
+static eventhandler_tag ifnet_arrival_tag;
+static eventhandler_tag ifnet_departure_tag;
+
+static void
+linux_ifnet_arrival(void *arg __unused, struct ifnet *ifp)
 {
-       struct epoch_tracker et;
-       struct ifnet *ifp;
-       int ret;
+       if (ifp->if_type == IFT_ETHER)
+               ifp->if_linux_ethno = alloc_unr(V_linux_eth_unr);
+}
 
-       CURVNET_ASSERT_SET();
+static void
+linux_ifnet_departure(void *arg __unused, struct ifnet *ifp)
+{
+       if (ifp->if_type == IFT_ETHER)
+               free_unr(V_linux_eth_unr, ifp->if_linux_ethno);
+}
 
-       ret = 0;
+void
+linux_ifnet_init(void)
+{
+       ifnet_arrival_tag = EVENTHANDLER_REGISTER(ifnet_arrival_event,
+           linux_ifnet_arrival, NULL, EVENTHANDLER_PRI_FIRST);
+       ifnet_departure_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
+           linux_ifnet_departure, NULL, EVENTHANDLER_PRI_LAST);
+}
+
+void
+linux_ifnet_uninit(void)
+{
+       EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ifnet_arrival_tag);
+       EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifnet_departure_tag);
+}
+
+static void
+linux_ifnet_vnet_init(void *arg __unused)
+{
+       struct epoch_tracker et;
+       struct if_iter it;
+       if_t ifp;
+
+       V_linux_eth_unr = new_unrhdr(0, INT_MAX, NULL);
        NET_EPOCH_ENTER(et);
-       ifp = ifunit(bsdname);
-       if (ifp != NULL)
-               ret = ifname_bsd_to_linux_ifp(ifp, lxname, len);
+       for (ifp = if_iter_start(&it); ifp != NULL; ifp = if_iter_next(&it))
+               linux_ifnet_arrival(NULL, ifp);
        NET_EPOCH_EXIT(et);
-       return (ret);
 }
+VNET_SYSINIT(linux_ifnet_vnet_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
+    linux_ifnet_vnet_init, NULL);
+
+#ifdef VIMAGE
+static void
+linux_ifnet_vnet_uninit(void *arg __unused)
+{
+       delete_unrhdr(V_linux_eth_unr);
+}
+VNET_SYSUNINIT(linux_ifnet_vnet_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
+    linux_ifnet_vnet_uninit, NULL);
+#endif
 
 /*
  * Translate a FreeBSD interface name to a Linux interface name
@@ -99,50 +137,23 @@ ifname_bsd_to_linux_idx(u_int idx, char *lxname, size_t 
len)
  * and return the number of bytes copied to lxname, 0 if interface
  * not found, -1 on error.
  */
-struct ifname_bsd_to_linux_ifp_cb_s {
-       struct ifnet    *ifp;
-       int             ethno;
-       char            *lxname;
-       size_t          len;
-};
-
-static int
-ifname_bsd_to_linux_ifp_cb(if_t ifp, void *arg)
-{
-       struct ifname_bsd_to_linux_ifp_cb_s *cbs = arg;
-
-       if (ifp == cbs->ifp)
-               return (snprintf(cbs->lxname, cbs->len, "eth%d", cbs->ethno));
-       if (IFP_IS_ETH(ifp))
-               cbs->ethno++;
-       return (0);
-}
-
 int
-ifname_bsd_to_linux_ifp(struct ifnet *ifp, char *lxname, size_t len)
+ifname_bsd_to_linux_ifp(const struct ifnet *ifp, char *lxname, size_t len)
 {
-       struct ifname_bsd_to_linux_ifp_cb_s arg = {
-               .ifp = ifp,
-               .ethno = 0,
-               .lxname = lxname,
-               .len = len,
-       };
-
-       NET_EPOCH_ASSERT();
-
        /*
         * Linux loopback interface name is lo (not lo0),
         * we translate lo to lo0, loX to loX.
         */
-       if (IFP_IS_LOOP(ifp) && strncmp(if_name(ifp), "lo0", IFNAMSIZ) == 0)
+       if (ifp->if_type == IFT_LOOP &&
+           strncmp(ifp->if_xname, "lo0", IFNAMSIZ) == 0)
                return (strlcpy(lxname, "lo", len));
 
        /* Short-circuit non ethernet interfaces. */
-       if (!IFP_IS_ETH(ifp) || use_real_ifnames)
-               return (strlcpy(lxname, if_name(ifp), len));
+       if (ifp->if_type != IFT_ETHER || use_real_ifnames)
+               return (strlcpy(lxname, ifp->if_xname, len));
 
        /* Determine the (relative) unit number for ethernet interfaces. */
-       return (if_foreach(ifname_bsd_to_linux_ifp_cb, &arg));
+       return (snprintf(lxname, len, "eth%d", ifp->if_linux_ethno));
 }
 
 /*
@@ -154,7 +165,6 @@ ifname_bsd_to_linux_ifp(struct ifnet *ifp, char *lxname, 
size_t len)
 struct ifname_linux_to_ifp_cb_s {
        bool            is_lo;
        bool            is_eth;
-       int             ethno;
        int             unit;
        const char      *lxname;
        if_t            ifp;
@@ -174,12 +184,11 @@ ifname_linux_to_ifp_cb(if_t ifp, void *arg)
         */
        if (strncmp(if_name(ifp), cbs->lxname, LINUX_IFNAMSIZ) == 0)
                goto out;
-       if (cbs->is_eth && IFP_IS_ETH(ifp) && cbs->unit == cbs->ethno)
+       if (cbs->is_eth && ifp->if_type == IFT_ETHER &&
+           ifp->if_linux_ethno == cbs->unit)
                goto out;
-       if (cbs->is_lo && IFP_IS_LOOP(ifp))
+       if (cbs->is_lo && ifp->if_type == IFT_LOOP)
                goto out;
-       if (IFP_IS_ETH(ifp))
-               cbs->ethno++;
        return (0);
 
 out:
@@ -188,12 +197,10 @@ out:
 }
 
 struct ifnet *
-ifname_linux_to_ifp(struct thread *td, const char *lxname)
+ifname_linux_to_ifp(const char *lxname)
 {
        struct ifname_linux_to_ifp_cb_s arg = {
-               .ethno = 0,
                .lxname = lxname,
-               .ifp = NULL,
        };
        int len;
        char *ep;
@@ -228,7 +235,7 @@ ifname_linux_to_bsd(struct thread *td, const char *lxname, 
char *bsdname)
 
        CURVNET_SET(TD_TO_VNET(td));
        NET_EPOCH_ENTER(et);
-       ifp = ifname_linux_to_ifp(td, lxname);
+       ifp = ifname_linux_to_ifp(lxname);
        if (ifp != NULL && bsdname != NULL)
                strlcpy(bsdname, if_name(ifp), IFNAMSIZ);
        NET_EPOCH_EXIT(et);
@@ -297,12 +304,12 @@ linux_ifhwaddr(struct ifnet *ifp, struct l_sockaddr *lsa)
 
        NET_EPOCH_ASSERT();
 
-       if (IFP_IS_LOOP(ifp)) {
+       if (ifp->if_type == IFT_LOOP) {
                bzero(lsa, sizeof(*lsa));
                lsa->sa_family = LINUX_ARPHRD_LOOPBACK;
                return (0);
        }
-       if (!IFP_IS_ETH(ifp))
+       if (ifp->if_type != IFT_ETHER)
                return (ENOENT);
        if (if_foreach_addr_type(ifp, AF_LINK, linux_ifhwaddr_cb, lsa) > 0)
                return (0);
diff --git a/sys/compat/linux/linux_netlink.c b/sys/compat/linux/linux_netlink.c
index 6aeafe84adc6..6dd2ad7ad8b0 100644
--- a/sys/compat/linux/linux_netlink.c
+++ b/sys/compat/linux/linux_netlink.c
@@ -249,9 +249,9 @@ nlmsg_translate_ifname_nla(struct nlattr *nla, struct 
nl_writer *nw)
 {
        char ifname[LINUX_IFNAMSIZ];
 
-       if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname,
-           sizeof(ifname)) <= 0)
+       if (nw->ifp == NULL)
                return (false);
+       (void)ifname_bsd_to_linux_ifp(nw->ifp, ifname, sizeof(ifname));
        return (nlattr_add_string(nw, IFLA_IFNAME, ifname));
 }
 
@@ -564,7 +564,7 @@ nlmsg_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 
struct nl_writer *nw)
 }
 
 static struct nl_buf *
-nlmsgs_to_linux(struct nl_buf *orig, struct nlpcb *nlp)
+nlmsgs_to_linux(struct nl_buf *orig, struct nlpcb *nlp, const struct ifnet 
*ifp)
 {
        struct nl_writer nw;
        u_int offset, msglen;
@@ -573,6 +573,7 @@ nlmsgs_to_linux(struct nl_buf *orig, struct nlpcb *nlp)
            orig->datalen + SCRATCH_BUFFER_SIZE, nlp, false)))
                return (NULL);
 
+       nw.ifp = ifp;
        /* Assume correct headers. Buffer IS mutable */
        for (offset = 0;
            offset + sizeof(struct nlmsghdr) <= orig->datalen;
diff --git a/sys/net/if_private.h b/sys/net/if_private.h
index 3da529e6b22e..b8cd0722eba6 100644
--- a/sys/net/if_private.h
+++ b/sys/net/if_private.h
@@ -65,6 +65,7 @@ struct ifnet {
        void    *if_linkmib;            /* link-type-specific MIB data */
        size_t  if_linkmiblen;          /* length of above data */
        u_int   if_refcount;            /* reference count */
+       u_int   if_linux_ethno;         /* linux name id for IFT_ETHER */
 
        /* These fields are shared with struct if_data. */
        uint8_t         if_type;        /* ethernet, tokenring, etc */
diff --git a/sys/netlink/netlink_io.c b/sys/netlink/netlink_io.c
index 2391d8ea752c..882c2181d24f 100644
--- a/sys/netlink/netlink_io.c
+++ b/sys/netlink/netlink_io.c
@@ -217,7 +217,7 @@ nl_send(struct nl_writer *nw, struct nlpcb *nlp)
        }
 
        if (nlp->nl_linux && linux_netlink_p != NULL) {
-               nb = linux_netlink_p->msgs_to_linux(nw->buf, nlp);
+               nb = linux_netlink_p->msgs_to_linux(nw->buf, nlp, nw->ifp);
                nl_buf_free(nw->buf);
                nw->buf = NULL;
                if (nb == NULL)
diff --git a/sys/netlink/netlink_linux.h b/sys/netlink/netlink_linux.h
index 794065692901..e9a25a04bb2b 100644
--- a/sys/netlink/netlink_linux.h
+++ b/sys/netlink/netlink_linux.h
@@ -37,7 +37,8 @@ struct nlpcb;
 struct nl_pstate;
 struct nl_writer;
 
-typedef struct nl_buf * msgs_to_linux_cb_t(struct nl_buf *, struct nlpcb *);
+typedef struct nl_buf * msgs_to_linux_cb_t(struct nl_buf *, struct nlpcb *,
+                           const struct ifnet *);
 typedef int msg_from_linux_cb_t(int netlink_family, struct nlmsghdr **hdr,
     struct nl_pstate *npt);
 
diff --git a/sys/netlink/netlink_message_writer.h 
b/sys/netlink/netlink_message_writer.h
index ad2099a4d636..be3f349ce9f6 100644
--- a/sys/netlink/netlink_message_writer.h
+++ b/sys/netlink/netlink_message_writer.h
@@ -39,6 +39,7 @@
 
 struct nl_buf;
 struct nl_writer;
+struct ifnet;
 typedef bool nl_writer_cb(struct nl_writer *nw);
 
 struct nl_writer {
@@ -53,6 +54,7 @@ struct nl_writer {
                        int             priv;
                } group;
        };
+       const struct ifnet *ifp;        /* Used by Linux translation only */
        u_int           num_messages;   /* Number of messages in the buffer */
        int             malloc_flag;    /* M_WAITOK or M_NOWAIT */
        bool            ignore_limit;   /* If true, ignores RCVBUF limit */
diff --git a/sys/netlink/route/iface.c b/sys/netlink/route/iface.c
index 9beb80792af4..70ec5e688c57 100644
--- a/sys/netlink/route/iface.c
+++ b/sys/netlink/route/iface.c
@@ -363,6 +363,8 @@ dump_iface(struct nl_writer *nw, if_t ifp, const struct 
nlmsghdr *hdr,
 
        ifc_dump_ifp_nl(ifp, nw);
 
+       nw->ifp = ifp;
+
         if (nlmsg_end(nw))
                return (true);
 

Reply via email to