Hi,

This diff serializes tun(4)/tap(4) interfaces creation and destruction
from ifconfig(8) and device nodes path. Not only simplifies, but fixes
syzkaller [1] report.

Can I ask openvpn or vmd(8) users to test it and prove that there is no
behavior changes?

1. https://syzkaller.appspot.com/bug?extid=171f3fbee35ba6030ee9

Index: sys/net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
diff -u -p -r1.726 if.c
--- sys/net/if.c        3 Feb 2025 08:58:52 -0000       1.726
+++ sys/net/if.c        12 Feb 2025 15:35:04 -0000
@@ -1310,38 +1310,48 @@ if_isconnected(const struct ifnet *ifp0,
  * Create a clone network interface.
  */
 int
-if_clone_create(const char *name, int rdomain)
+if_clone_create_locked(const char *name, int rdomain)
 {
        struct if_clone *ifc;
        struct ifnet *ifp;
        int unit, ret;
 
+       rw_assert_wrlock(&if_cloners_lock);
+
        ifc = if_clone_lookup(name, &unit);
        if (ifc == NULL)
                return (EINVAL);
-
-       rw_enter_write(&if_cloners_lock);
-
        if ((ifp = if_unit(name)) != NULL) {
                ret = EEXIST;
-               goto unlock;
+               goto put;
        }
 
        ret = (*ifc->ifc_create)(ifc, unit);
 
        if (ret != 0 || (ifp = if_unit(name)) == NULL)
-               goto unlock;
+               goto put;
 
        NET_LOCK();
        if_addgroup(ifp, ifc->ifc_name);
        if (rdomain != 0)
                if_setrdomain(ifp, rdomain);
        NET_UNLOCK();
-unlock:
-       rw_exit_write(&if_cloners_lock);
+put:
        if_put(ifp);
 
        return (ret);
+}
+
+int
+if_clone_create(const char *name, int rdomain)
+{
+       int error;
+
+       rw_enter_write(&if_cloners_lock);
+       error = if_clone_create_locked(name, rdomain);
+       rw_exit_write(&if_cloners_lock);
+
+       return (error);
 }
 
 /*
Index: sys/net/if_tun.c
===================================================================
RCS file: /cvs/src/sys/net/if_tun.c,v
diff -u -p -r1.250 if_tun.c
--- sys/net/if_tun.c    30 Dec 2024 02:46:00 -0000      1.250
+++ sys/net/if_tun.c    12 Feb 2025 15:35:04 -0000
@@ -115,7 +115,7 @@ int tundebug = TUN_DEBUG;
 void   tunattach(int);
 
 int    tun_dev_open(dev_t, const struct if_clone *, int, struct proc *);
-int    tun_dev_close(dev_t, struct proc *);
+int    tun_dev_close(dev_t, const struct if_clone *, struct proc *);
 int    tun_dev_ioctl(dev_t, u_long, void *);
 int    tun_dev_read(dev_t, struct uio *, int);
 int    tun_dev_write(dev_t, struct uio *, int, int);
@@ -285,11 +285,6 @@ tun_create(struct if_clone *ifc, int uni
 
        sigio_init(&sc->sc_sigio);
 
-       /* tell tun_dev_open we're initialised */
-
-       sc->sc_flags |= TUN_INITED|TUN_STAYUP;
-       wakeup(sc);
-
        return (0);
 
 exists:
@@ -303,25 +298,10 @@ int
 tun_clone_destroy(struct ifnet *ifp)
 {
        struct tun_softc        *sc = ifp->if_softc;
-       dev_t                    dev;
 
        KERNEL_ASSERT_LOCKED();
 
-       if (ISSET(sc->sc_flags, TUN_DEAD))
-               return (ENXIO);
        SET(sc->sc_flags, TUN_DEAD);
-
-       /* kick userland off the device */
-       dev = sc->sc_dev;
-       if (dev) {
-               struct vnode *vp;
-
-               if (vfinddev(dev, VCHR, &vp))
-                       VOP_REVOKE(vp, REVOKEALL);
-
-               KASSERT(sc->sc_dev == 0);
-       }
-
        /* prevent userland from getting to the device again */
        SMR_LIST_REMOVE_LOCKED(sc, sc_entry);
        smr_barrier();
@@ -390,145 +370,70 @@ tun_dev_open(dev_t dev, const struct if_
        struct tun_softc *sc;
        struct ifnet *ifp;
        int error;
-       u_short stayup = 0;
-       struct vnode *vp;
 
        char name[IFNAMSIZ];
        unsigned int rdomain;
 
-       /*
-        * Find the vnode associated with this open before we sleep
-        * and let something else revoke it. Our caller has a reference
-        * to it so we don't need to account for it.
-        */
-       if (!vfinddev(dev, VCHR, &vp))
-               panic("%s vfinddev failed", __func__);
-
        snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev));
-       rdomain = rtable_l2(p->p_p->ps_rtableid);
 
-       /* let's find or make an interface to work with */
-       while ((sc = tun_name_lookup(name)) == NULL) {
-               error = if_clone_create(name, rdomain);
+       rw_enter_write(&if_cloners_lock);
+
+       if ((ifp = if_unit(name)) == NULL) {
+               rdomain = rtable_l2(p->p_p->ps_rtableid);
+               error = if_clone_create_locked(name, rdomain);
                switch (error) {
-               case 0: /* it's probably ours */
-                       stayup = TUN_STAYUP;
-                       /* FALLTHROUGH */
-               case EEXIST: /* we may have lost a race with someone else */
+               case 0:
+               case EEXIST:
                        break;
                default:
-                       return (error);
+                       goto out;
                }
-       }
-
-       refcnt_take(&sc->sc_refs);
 
-       /* wait for it to be fully constructed before we use it */
-       for (;;) {
-               if (ISSET(sc->sc_flags, TUN_DEAD)) {
-                       error = ENXIO;
-                       goto done;
-               }
-
-               if (ISSET(sc->sc_flags, TUN_INITED))
-                       break;
-
-               error = tsleep_nsec(sc, PCATCH, "tuninit", INFSLP);
-               if (error != 0) {
-                       /* XXX if_clone_destroy if stayup? */
-                       goto done;
-               }
+               ifp = if_unit(name);
+               KASSERT(ifp != NULL);
        }
 
-       /* Has tun_clone_destroy torn the rug out under us? */
-       if (vp->v_type == VBAD) {
-               error = ENXIO;
-               goto done;
-       }
+       sc = ifp->if_softc;
 
-       if (sc->sc_dev != 0) {
-               /* aww, we lost */
+       if (sc->sc_dev) {
                error = EBUSY;
-               goto done;
+               goto out;
        }
-       /* it's ours now */
        sc->sc_dev = dev;
-       CLR(sc->sc_flags, stayup);
 
-       /* automatically mark the interface running on open */
-       ifp = &sc->sc_if;
        NET_LOCK();
        SET(ifp->if_flags, IFF_UP | IFF_RUNNING);
        NET_UNLOCK();
        tun_link_state(ifp, LINK_STATE_FULL_DUPLEX);
-       error = 0;
 
-done:
-       tun_put(sc);
+out:
+       rw_exit_write(&if_cloners_lock);
+       if_put(ifp);
+
        return (error);
 }
 
-/*
- * tunclose - close the device; if closing the real device, flush pending
- *  output and unless STAYUP bring down and destroy the interface.
- */
 int
 tunclose(dev_t dev, int flag, int mode, struct proc *p)
 {
-       return (tun_dev_close(dev, p));
+       return (tun_dev_close(dev, &tun_cloner, p));
 }
 
 int
 tapclose(dev_t dev, int flag, int mode, struct proc *p)
 {
-       return (tun_dev_close(dev, p));
+       return (tun_dev_close(dev, &tap_cloner, p));
 }
 
 int
-tun_dev_close(dev_t dev, struct proc *p)
+tun_dev_close(dev_t dev, const struct if_clone *ifc, struct proc *p)
 {
-       struct tun_softc        *sc;
-       struct ifnet            *ifp;
-       int                      error = 0;
-       char                     name[IFNAMSIZ];
-       int                      destroy = 0;
-
-       sc = tun_get(dev);
-       if (sc == NULL)
-               return (ENXIO);
-
-       ifp = &sc->sc_if;
-
-       /*
-        * junk all pending output
-        */
-       NET_LOCK();
-       CLR(ifp->if_flags, IFF_UP | IFF_RUNNING);
-       CLR(ifp->if_capabilities, TUN_IF_CAPS);
-       NET_UNLOCK();
-       ifq_purge(&ifp->if_snd);
-
-       CLR(sc->sc_flags, TUN_ASYNC|TUN_HDR);
-       sigio_free(&sc->sc_sigio);
-
-       if (!ISSET(sc->sc_flags, TUN_DEAD)) {
-               /* we can't hold a reference to sc before we start a dtor */
-               if (!ISSET(sc->sc_flags, TUN_STAYUP)) {
-                       destroy = 1;
-                       strlcpy(name, ifp->if_xname, sizeof(name));
-               } else {
-                       tun_link_state(ifp, LINK_STATE_DOWN);
-               }
-       }
-
-       sc->sc_dev = 0;
-
-       tun_put(sc);
+       char name[IFNAMSIZ];
 
-       if (destroy)
-               if_clone_destroy(name);
+       snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev));
+       if_clone_destroy(name);
 
-       return (error);
+       return (0);
 }
 
 /*
@@ -831,10 +736,24 @@ tun_dev_read(dev_t dev, struct uio *uio,
 
        ifp = &sc->sc_if;
 
-       error = ifq_deq_sleep(&ifp->if_snd, &m0, ISSET(ioflag, IO_NDELAY),
-           (PZERO + 1)|PCATCH, "tunread", &sc->sc_reading, &sc->sc_dev);
-       if (error != 0)
-               goto put;
+       while ((m0 = ifq_dequeue(&ifp->if_snd)) == NULL) {
+               if (ISSET(ioflag, IO_NDELAY)) {
+                       error = EWOULDBLOCK;
+                       goto put;
+               }
+
+               sc->sc_reading = 1;
+               error = tsleep_nsec(&ifp->if_snd, (PZERO + 1) | PCATCH,
+                   "tunread", INFSLP);
+               sc->sc_reading = 0;
+
+               if (error != 0)
+                       goto put;
+               if (ISSET(sc->sc_flags, TUN_DEAD)) {
+                       error = ENXIO;
+                       goto put;
+               }
+       }
 
 #if NBPFILTER > 0
        if (ifp->if_bpf)
Index: sys/net/if_tun.h
===================================================================
RCS file: /cvs/src/sys/net/if_tun.h,v
diff -u -p -r1.18 if_tun.h
--- sys/net/if_tun.h    17 Nov 2024 00:25:07 -0000      1.18
+++ sys/net/if_tun.h    12 Feb 2025 15:35:04 -0000
@@ -75,7 +75,7 @@ struct tun_hdr {
 };
 
 #define        TUN_OPEN        0x0001
-#define        TUN_INITED      0x0002
+#define        TUN_INITED      0x0002  /* unused */
 #define        TUN_RCOLL       0x0004  /* unused */
 #define        TUN_IASET       0x0008
 #define        TUN_DSTADDR     0x0010
@@ -83,7 +83,7 @@ struct tun_hdr {
 #define        TUN_ASYNC       0x0080
 #define        TUN_NBIO        0x0100
 #define TUN_BRDADDR    0x0200
-#define TUN_STAYUP     0x0400
+#define TUN_STAYUP     0x0400  /* unused */
 #define TUN_LAYER2     0x0800
 
 #define        TUN_READY       (TUN_OPEN | TUN_INITED)
Index: sys/net/if_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_var.h,v
diff -u -p -r1.135 if_var.h
--- sys/net/if_var.h    24 Jan 2025 09:19:07 -0000      1.135
+++ sys/net/if_var.h    12 Feb 2025 15:35:04 -0000
@@ -325,6 +325,7 @@ int         niq_enlist(struct niqueue *, struct
     sysctl_mq((_n), (_l), (_op), (_olp), (_np), (_nl), &(_niq)->ni_q)
 
 extern struct ifnet_head ifnetlist;
+extern struct rwlock if_cloners_lock;
 
 void   if_start(struct ifnet *);
 int    if_enqueue(struct ifnet *, struct mbuf *);
@@ -355,6 +356,7 @@ int if_isconnected(const struct ifnet *,
 
 void   if_clone_attach(struct if_clone *);
 
+int    if_clone_create_locked(const char *, int);
 int    if_clone_create(const char *, int);
 int    if_clone_destroy(const char *);
 
Index: sys/net/ifq.c
===================================================================
RCS file: /cvs/src/sys/net/ifq.c,v
diff -u -p -r1.56 ifq.c
--- sys/net/ifq.c       3 Feb 2025 08:58:52 -0000       1.56
+++ sys/net/ifq.c       12 Feb 2025 15:35:04 -0000
@@ -478,45 +478,6 @@ ifq_dequeue(struct ifqueue *ifq)
 }
 
 int
-ifq_deq_sleep(struct ifqueue *ifq, struct mbuf **mp, int nbio, int priority,
-    const char *wmesg, volatile unsigned int *sleeping,
-    volatile unsigned int *alive)
-{
-       struct mbuf *m;
-       void *cookie;
-       int error = 0;
-
-       ifq_deq_enter(ifq);
-       if (ifq->ifq_len == 0 && nbio)
-               error = EWOULDBLOCK;
-       else {
-               for (;;) {
-                       m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie);
-                       if (m != NULL) {
-                               ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
-                               ifq->ifq_len--;
-                               *mp = m;
-                               break;
-                       }
-
-                       (*sleeping)++;
-                       error = msleep_nsec(ifq, &ifq->ifq_mtx,
-                           priority, wmesg, INFSLP);
-                       (*sleeping)--;
-                       if (error != 0)
-                               break;
-                       if (!(*alive)) {
-                               error = EIO;
-                               break;
-                       }
-               }
-       }
-       ifq_deq_leave(ifq);
-
-       return (error);
-}
-
-int
 ifq_hdatalen(struct ifqueue *ifq)
 {
        struct mbuf *m;
Index: sys/net/ifq.h
===================================================================
RCS file: /cvs/src/sys/net/ifq.h,v
diff -u -p -r1.43 ifq.h
--- sys/net/ifq.h       3 Feb 2025 08:58:52 -0000       1.43
+++ sys/net/ifq.h       12 Feb 2025 15:35:04 -0000
@@ -446,10 +446,6 @@ void                ifq_barrier(struct ifqueue *);
 void            ifq_set_oactive(struct ifqueue *);
 void            ifq_deq_set_oactive(struct ifqueue *);
 
-int             ifq_deq_sleep(struct ifqueue *, struct mbuf **, int, int,
-                    const char *, volatile unsigned int *,
-                    volatile unsigned int *);
-
 #define ifq_len(_ifq)          READ_ONCE((_ifq)->ifq_len)
 #define ifq_empty(_ifq)                (ifq_len(_ifq) == 0)
 

Reply via email to