Hi, This diff serializes tun(4)/tap(4) interfaces creation and destruction from ifconfig(8) and device nodes path. Not only simplifies, but fixes syzkaller [1] report.
Can I ask openvpn or vmd(8) users to test it and prove that there is no behavior changes? 1. https://syzkaller.appspot.com/bug?extid=171f3fbee35ba6030ee9 Index: sys/net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v diff -u -p -r1.726 if.c --- sys/net/if.c 3 Feb 2025 08:58:52 -0000 1.726 +++ sys/net/if.c 12 Feb 2025 15:35:04 -0000 @@ -1310,38 +1310,48 @@ if_isconnected(const struct ifnet *ifp0, * Create a clone network interface. */ int -if_clone_create(const char *name, int rdomain) +if_clone_create_locked(const char *name, int rdomain) { struct if_clone *ifc; struct ifnet *ifp; int unit, ret; + rw_assert_wrlock(&if_cloners_lock); + ifc = if_clone_lookup(name, &unit); if (ifc == NULL) return (EINVAL); - - rw_enter_write(&if_cloners_lock); - if ((ifp = if_unit(name)) != NULL) { ret = EEXIST; - goto unlock; + goto put; } ret = (*ifc->ifc_create)(ifc, unit); if (ret != 0 || (ifp = if_unit(name)) == NULL) - goto unlock; + goto put; NET_LOCK(); if_addgroup(ifp, ifc->ifc_name); if (rdomain != 0) if_setrdomain(ifp, rdomain); NET_UNLOCK(); -unlock: - rw_exit_write(&if_cloners_lock); +put: if_put(ifp); return (ret); +} + +int +if_clone_create(const char *name, int rdomain) +{ + int error; + + rw_enter_write(&if_cloners_lock); + error = if_clone_create_locked(name, rdomain); + rw_exit_write(&if_cloners_lock); + + return (error); } /* Index: sys/net/if_tun.c =================================================================== RCS file: /cvs/src/sys/net/if_tun.c,v diff -u -p -r1.250 if_tun.c --- sys/net/if_tun.c 30 Dec 2024 02:46:00 -0000 1.250 +++ sys/net/if_tun.c 12 Feb 2025 15:35:04 -0000 @@ -115,7 +115,7 @@ int tundebug = TUN_DEBUG; void tunattach(int); int tun_dev_open(dev_t, const struct if_clone *, int, struct proc *); -int tun_dev_close(dev_t, struct proc *); +int tun_dev_close(dev_t, const struct if_clone *, struct proc *); int tun_dev_ioctl(dev_t, u_long, void *); int tun_dev_read(dev_t, struct uio *, int); int tun_dev_write(dev_t, struct uio *, int, int); @@ -285,11 +285,6 @@ tun_create(struct if_clone *ifc, int uni sigio_init(&sc->sc_sigio); - /* tell tun_dev_open we're initialised */ - - sc->sc_flags |= TUN_INITED|TUN_STAYUP; - wakeup(sc); - return (0); exists: @@ -303,25 +298,10 @@ int tun_clone_destroy(struct ifnet *ifp) { struct tun_softc *sc = ifp->if_softc; - dev_t dev; KERNEL_ASSERT_LOCKED(); - if (ISSET(sc->sc_flags, TUN_DEAD)) - return (ENXIO); SET(sc->sc_flags, TUN_DEAD); - - /* kick userland off the device */ - dev = sc->sc_dev; - if (dev) { - struct vnode *vp; - - if (vfinddev(dev, VCHR, &vp)) - VOP_REVOKE(vp, REVOKEALL); - - KASSERT(sc->sc_dev == 0); - } - /* prevent userland from getting to the device again */ SMR_LIST_REMOVE_LOCKED(sc, sc_entry); smr_barrier(); @@ -390,145 +370,70 @@ tun_dev_open(dev_t dev, const struct if_ struct tun_softc *sc; struct ifnet *ifp; int error; - u_short stayup = 0; - struct vnode *vp; char name[IFNAMSIZ]; unsigned int rdomain; - /* - * Find the vnode associated with this open before we sleep - * and let something else revoke it. Our caller has a reference - * to it so we don't need to account for it. - */ - if (!vfinddev(dev, VCHR, &vp)) - panic("%s vfinddev failed", __func__); - snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev)); - rdomain = rtable_l2(p->p_p->ps_rtableid); - /* let's find or make an interface to work with */ - while ((sc = tun_name_lookup(name)) == NULL) { - error = if_clone_create(name, rdomain); + rw_enter_write(&if_cloners_lock); + + if ((ifp = if_unit(name)) == NULL) { + rdomain = rtable_l2(p->p_p->ps_rtableid); + error = if_clone_create_locked(name, rdomain); switch (error) { - case 0: /* it's probably ours */ - stayup = TUN_STAYUP; - /* FALLTHROUGH */ - case EEXIST: /* we may have lost a race with someone else */ + case 0: + case EEXIST: break; default: - return (error); + goto out; } - } - - refcnt_take(&sc->sc_refs); - /* wait for it to be fully constructed before we use it */ - for (;;) { - if (ISSET(sc->sc_flags, TUN_DEAD)) { - error = ENXIO; - goto done; - } - - if (ISSET(sc->sc_flags, TUN_INITED)) - break; - - error = tsleep_nsec(sc, PCATCH, "tuninit", INFSLP); - if (error != 0) { - /* XXX if_clone_destroy if stayup? */ - goto done; - } + ifp = if_unit(name); + KASSERT(ifp != NULL); } - /* Has tun_clone_destroy torn the rug out under us? */ - if (vp->v_type == VBAD) { - error = ENXIO; - goto done; - } + sc = ifp->if_softc; - if (sc->sc_dev != 0) { - /* aww, we lost */ + if (sc->sc_dev) { error = EBUSY; - goto done; + goto out; } - /* it's ours now */ sc->sc_dev = dev; - CLR(sc->sc_flags, stayup); - /* automatically mark the interface running on open */ - ifp = &sc->sc_if; NET_LOCK(); SET(ifp->if_flags, IFF_UP | IFF_RUNNING); NET_UNLOCK(); tun_link_state(ifp, LINK_STATE_FULL_DUPLEX); - error = 0; -done: - tun_put(sc); +out: + rw_exit_write(&if_cloners_lock); + if_put(ifp); + return (error); } -/* - * tunclose - close the device; if closing the real device, flush pending - * output and unless STAYUP bring down and destroy the interface. - */ int tunclose(dev_t dev, int flag, int mode, struct proc *p) { - return (tun_dev_close(dev, p)); + return (tun_dev_close(dev, &tun_cloner, p)); } int tapclose(dev_t dev, int flag, int mode, struct proc *p) { - return (tun_dev_close(dev, p)); + return (tun_dev_close(dev, &tap_cloner, p)); } int -tun_dev_close(dev_t dev, struct proc *p) +tun_dev_close(dev_t dev, const struct if_clone *ifc, struct proc *p) { - struct tun_softc *sc; - struct ifnet *ifp; - int error = 0; - char name[IFNAMSIZ]; - int destroy = 0; - - sc = tun_get(dev); - if (sc == NULL) - return (ENXIO); - - ifp = &sc->sc_if; - - /* - * junk all pending output - */ - NET_LOCK(); - CLR(ifp->if_flags, IFF_UP | IFF_RUNNING); - CLR(ifp->if_capabilities, TUN_IF_CAPS); - NET_UNLOCK(); - ifq_purge(&ifp->if_snd); - - CLR(sc->sc_flags, TUN_ASYNC|TUN_HDR); - sigio_free(&sc->sc_sigio); - - if (!ISSET(sc->sc_flags, TUN_DEAD)) { - /* we can't hold a reference to sc before we start a dtor */ - if (!ISSET(sc->sc_flags, TUN_STAYUP)) { - destroy = 1; - strlcpy(name, ifp->if_xname, sizeof(name)); - } else { - tun_link_state(ifp, LINK_STATE_DOWN); - } - } - - sc->sc_dev = 0; - - tun_put(sc); + char name[IFNAMSIZ]; - if (destroy) - if_clone_destroy(name); + snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev)); + if_clone_destroy(name); - return (error); + return (0); } /* @@ -831,10 +736,24 @@ tun_dev_read(dev_t dev, struct uio *uio, ifp = &sc->sc_if; - error = ifq_deq_sleep(&ifp->if_snd, &m0, ISSET(ioflag, IO_NDELAY), - (PZERO + 1)|PCATCH, "tunread", &sc->sc_reading, &sc->sc_dev); - if (error != 0) - goto put; + while ((m0 = ifq_dequeue(&ifp->if_snd)) == NULL) { + if (ISSET(ioflag, IO_NDELAY)) { + error = EWOULDBLOCK; + goto put; + } + + sc->sc_reading = 1; + error = tsleep_nsec(&ifp->if_snd, (PZERO + 1) | PCATCH, + "tunread", INFSLP); + sc->sc_reading = 0; + + if (error != 0) + goto put; + if (ISSET(sc->sc_flags, TUN_DEAD)) { + error = ENXIO; + goto put; + } + } #if NBPFILTER > 0 if (ifp->if_bpf) Index: sys/net/if_tun.h =================================================================== RCS file: /cvs/src/sys/net/if_tun.h,v diff -u -p -r1.18 if_tun.h --- sys/net/if_tun.h 17 Nov 2024 00:25:07 -0000 1.18 +++ sys/net/if_tun.h 12 Feb 2025 15:35:04 -0000 @@ -75,7 +75,7 @@ struct tun_hdr { }; #define TUN_OPEN 0x0001 -#define TUN_INITED 0x0002 +#define TUN_INITED 0x0002 /* unused */ #define TUN_RCOLL 0x0004 /* unused */ #define TUN_IASET 0x0008 #define TUN_DSTADDR 0x0010 @@ -83,7 +83,7 @@ struct tun_hdr { #define TUN_ASYNC 0x0080 #define TUN_NBIO 0x0100 #define TUN_BRDADDR 0x0200 -#define TUN_STAYUP 0x0400 +#define TUN_STAYUP 0x0400 /* unused */ #define TUN_LAYER2 0x0800 #define TUN_READY (TUN_OPEN | TUN_INITED) Index: sys/net/if_var.h =================================================================== RCS file: /cvs/src/sys/net/if_var.h,v diff -u -p -r1.135 if_var.h --- sys/net/if_var.h 24 Jan 2025 09:19:07 -0000 1.135 +++ sys/net/if_var.h 12 Feb 2025 15:35:04 -0000 @@ -325,6 +325,7 @@ int niq_enlist(struct niqueue *, struct sysctl_mq((_n), (_l), (_op), (_olp), (_np), (_nl), &(_niq)->ni_q) extern struct ifnet_head ifnetlist; +extern struct rwlock if_cloners_lock; void if_start(struct ifnet *); int if_enqueue(struct ifnet *, struct mbuf *); @@ -355,6 +356,7 @@ int if_isconnected(const struct ifnet *, void if_clone_attach(struct if_clone *); +int if_clone_create_locked(const char *, int); int if_clone_create(const char *, int); int if_clone_destroy(const char *); Index: sys/net/ifq.c =================================================================== RCS file: /cvs/src/sys/net/ifq.c,v diff -u -p -r1.56 ifq.c --- sys/net/ifq.c 3 Feb 2025 08:58:52 -0000 1.56 +++ sys/net/ifq.c 12 Feb 2025 15:35:04 -0000 @@ -478,45 +478,6 @@ ifq_dequeue(struct ifqueue *ifq) } int -ifq_deq_sleep(struct ifqueue *ifq, struct mbuf **mp, int nbio, int priority, - const char *wmesg, volatile unsigned int *sleeping, - volatile unsigned int *alive) -{ - struct mbuf *m; - void *cookie; - int error = 0; - - ifq_deq_enter(ifq); - if (ifq->ifq_len == 0 && nbio) - error = EWOULDBLOCK; - else { - for (;;) { - m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie); - if (m != NULL) { - ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie); - ifq->ifq_len--; - *mp = m; - break; - } - - (*sleeping)++; - error = msleep_nsec(ifq, &ifq->ifq_mtx, - priority, wmesg, INFSLP); - (*sleeping)--; - if (error != 0) - break; - if (!(*alive)) { - error = EIO; - break; - } - } - } - ifq_deq_leave(ifq); - - return (error); -} - -int ifq_hdatalen(struct ifqueue *ifq) { struct mbuf *m; Index: sys/net/ifq.h =================================================================== RCS file: /cvs/src/sys/net/ifq.h,v diff -u -p -r1.43 ifq.h --- sys/net/ifq.h 3 Feb 2025 08:58:52 -0000 1.43 +++ sys/net/ifq.h 12 Feb 2025 15:35:04 -0000 @@ -446,10 +446,6 @@ void ifq_barrier(struct ifqueue *); void ifq_set_oactive(struct ifqueue *); void ifq_deq_set_oactive(struct ifqueue *); -int ifq_deq_sleep(struct ifqueue *, struct mbuf **, int, int, - const char *, volatile unsigned int *, - volatile unsigned int *); - #define ifq_len(_ifq) READ_ONCE((_ifq)->ifq_len) #define ifq_empty(_ifq) (ifq_len(_ifq) == 0)