> On 13 Feb 2025, at 19:33, Claudio Jeker <cje...@diehard.n-r-g.com> wrote: > > On Wed, Feb 12, 2025 at 06:47:36PM +0300, Vitaliy Makkoveev wrote: >> Hi, >> >> This diff serializes tun(4)/tap(4) interfaces creation and destruction >> from ifconfig(8) and device nodes path. Not only simplifies, but fixes >> syzkaller [1] report. >> >> Can I ask openvpn or vmd(8) users to test it and prove that there is no >> behavior changes? > > Just to be clear the diff changes the way tun(4) and tap(4) interfaces are > created and destroyed. So it matters on how the interface is created and > if it does behave properly.
The original creation and destruction paths of tun(4) are full of sleep points, so those paths have dances to protect half-created or half-destroyed interface. This diff uses the dedicated rwlock for cloners in the device nodes handlers to make races impossible. > I think the things to test are: > - If you open /dev/tun1 is tun1 created and if you close /dev/tun1 is > it destroyed again. It works. > - If tun1 alreay exists on open of /dev/tun1 then the last close of > /dev/tun1 should not destroy tun1. This is not the current behaviour. The TUN_STAYUP exists to prevent the concurrent destruction from tun_dev_close() while we are in the middle of tun_dev_open(). This could happen from the VOP_REWOKE() path of tun_clone_destroy(). In this case the interface will be destroyed, but not by tun_dev_close(). > - If you have /dev/tun1 open and destroy tun1 (the interface) are you > notified that /dev/tun1 is no longer available. > Only tun_dev_read() should be notified, and yes, it works. > I think there are other cases to consider, those are just the obvious ones > that I remember. I glanced at the diff, it changes a hell of a lot and the > bit that scares me is that TUN_STAYUP becomes unused and looking at > tun_dev_close() I think this is indeed not right. In other words I think > the 2nd bit from the list above is violated. As I previously said, the TUN_STAYUP bit required only to prevent concurrent destruction in the middle of tun_dev_open(). Now concurrent destruction is impossible. > >> 1. https://syzkaller.appspot.com/bug?extid=171f3fbee35ba6030ee9 >> >> Index: sys/net/if.c >> =================================================================== >> RCS file: /cvs/src/sys/net/if.c,v >> diff -u -p -r1.726 if.c >> --- sys/net/if.c 3 Feb 2025 08:58:52 -0000 1.726 >> +++ sys/net/if.c 12 Feb 2025 15:35:04 -0000 >> @@ -1310,38 +1310,48 @@ if_isconnected(const struct ifnet *ifp0, >> * Create a clone network interface. >> */ >> int >> -if_clone_create(const char *name, int rdomain) >> +if_clone_create_locked(const char *name, int rdomain) >> { >> struct if_clone *ifc; >> struct ifnet *ifp; >> int unit, ret; >> >> + rw_assert_wrlock(&if_cloners_lock); >> + >> ifc = if_clone_lookup(name, &unit); >> if (ifc == NULL) >> return (EINVAL); >> - >> - rw_enter_write(&if_cloners_lock); >> - >> if ((ifp = if_unit(name)) != NULL) { >> ret = EEXIST; >> - goto unlock; >> + goto put; >> } >> >> ret = (*ifc->ifc_create)(ifc, unit); >> >> if (ret != 0 || (ifp = if_unit(name)) == NULL) >> - goto unlock; >> + goto put; >> >> NET_LOCK(); >> if_addgroup(ifp, ifc->ifc_name); >> if (rdomain != 0) >> if_setrdomain(ifp, rdomain); >> NET_UNLOCK(); >> -unlock: >> - rw_exit_write(&if_cloners_lock); >> +put: >> if_put(ifp); >> >> return (ret); >> +} >> + >> +int >> +if_clone_create(const char *name, int rdomain) >> +{ >> + int error; >> + >> + rw_enter_write(&if_cloners_lock); >> + error = if_clone_create_locked(name, rdomain); >> + rw_exit_write(&if_cloners_lock); >> + >> + return (error); >> } >> >> /* >> Index: sys/net/if_tun.c >> =================================================================== >> RCS file: /cvs/src/sys/net/if_tun.c,v >> diff -u -p -r1.250 if_tun.c >> --- sys/net/if_tun.c 30 Dec 2024 02:46:00 -0000 1.250 >> +++ sys/net/if_tun.c 12 Feb 2025 15:35:04 -0000 >> @@ -115,7 +115,7 @@ int tundebug = TUN_DEBUG; >> void tunattach(int); >> >> int tun_dev_open(dev_t, const struct if_clone *, int, struct proc *); >> -int tun_dev_close(dev_t, struct proc *); >> +int tun_dev_close(dev_t, const struct if_clone *, struct proc *); >> int tun_dev_ioctl(dev_t, u_long, void *); >> int tun_dev_read(dev_t, struct uio *, int); >> int tun_dev_write(dev_t, struct uio *, int, int); >> @@ -285,11 +285,6 @@ tun_create(struct if_clone *ifc, int uni >> >> sigio_init(&sc->sc_sigio); >> >> - /* tell tun_dev_open we're initialised */ >> - >> - sc->sc_flags |= TUN_INITED|TUN_STAYUP; >> - wakeup(sc); >> - >> return (0); >> >> exists: >> @@ -303,25 +298,10 @@ int >> tun_clone_destroy(struct ifnet *ifp) >> { >> struct tun_softc *sc = ifp->if_softc; >> - dev_t dev; >> >> KERNEL_ASSERT_LOCKED(); >> >> - if (ISSET(sc->sc_flags, TUN_DEAD)) >> - return (ENXIO); >> SET(sc->sc_flags, TUN_DEAD); >> - >> - /* kick userland off the device */ >> - dev = sc->sc_dev; >> - if (dev) { >> - struct vnode *vp; >> - >> - if (vfinddev(dev, VCHR, &vp)) >> - VOP_REVOKE(vp, REVOKEALL); >> - >> - KASSERT(sc->sc_dev == 0); >> - } >> - >> /* prevent userland from getting to the device again */ >> SMR_LIST_REMOVE_LOCKED(sc, sc_entry); >> smr_barrier(); >> @@ -390,145 +370,70 @@ tun_dev_open(dev_t dev, const struct if_ >> struct tun_softc *sc; >> struct ifnet *ifp; >> int error; >> - u_short stayup = 0; >> - struct vnode *vp; >> >> char name[IFNAMSIZ]; >> unsigned int rdomain; >> >> - /* >> - * Find the vnode associated with this open before we sleep >> - * and let something else revoke it. Our caller has a reference >> - * to it so we don't need to account for it. >> - */ >> - if (!vfinddev(dev, VCHR, &vp)) >> - panic("%s vfinddev failed", __func__); >> - >> snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev)); >> - rdomain = rtable_l2(p->p_p->ps_rtableid); >> >> - /* let's find or make an interface to work with */ >> - while ((sc = tun_name_lookup(name)) == NULL) { >> - error = if_clone_create(name, rdomain); >> + rw_enter_write(&if_cloners_lock); >> + >> + if ((ifp = if_unit(name)) == NULL) { >> + rdomain = rtable_l2(p->p_p->ps_rtableid); >> + error = if_clone_create_locked(name, rdomain); >> switch (error) { >> - case 0: /* it's probably ours */ >> - stayup = TUN_STAYUP; >> - /* FALLTHROUGH */ >> - case EEXIST: /* we may have lost a race with someone else */ >> + case 0: >> + case EEXIST: >> break; >> default: >> - return (error); >> + goto out; >> } >> - } >> - >> - refcnt_take(&sc->sc_refs); >> >> - /* wait for it to be fully constructed before we use it */ >> - for (;;) { >> - if (ISSET(sc->sc_flags, TUN_DEAD)) { >> - error = ENXIO; >> - goto done; >> - } >> - >> - if (ISSET(sc->sc_flags, TUN_INITED)) >> - break; >> - >> - error = tsleep_nsec(sc, PCATCH, "tuninit", INFSLP); >> - if (error != 0) { >> - /* XXX if_clone_destroy if stayup? */ >> - goto done; >> - } >> + ifp = if_unit(name); >> + KASSERT(ifp != NULL); >> } >> >> - /* Has tun_clone_destroy torn the rug out under us? */ >> - if (vp->v_type == VBAD) { >> - error = ENXIO; >> - goto done; >> - } >> + sc = ifp->if_softc; >> >> - if (sc->sc_dev != 0) { >> - /* aww, we lost */ >> + if (sc->sc_dev) { >> error = EBUSY; >> - goto done; >> + goto out; >> } >> - /* it's ours now */ >> sc->sc_dev = dev; >> - CLR(sc->sc_flags, stayup); >> >> - /* automatically mark the interface running on open */ >> - ifp = &sc->sc_if; >> NET_LOCK(); >> SET(ifp->if_flags, IFF_UP | IFF_RUNNING); >> NET_UNLOCK(); >> tun_link_state(ifp, LINK_STATE_FULL_DUPLEX); >> - error = 0; >> >> -done: >> - tun_put(sc); >> +out: >> + rw_exit_write(&if_cloners_lock); >> + if_put(ifp); >> + >> return (error); >> } >> >> -/* >> - * tunclose - close the device; if closing the real device, flush pending >> - * output and unless STAYUP bring down and destroy the interface. >> - */ >> int >> tunclose(dev_t dev, int flag, int mode, struct proc *p) >> { >> - return (tun_dev_close(dev, p)); >> + return (tun_dev_close(dev, &tun_cloner, p)); >> } >> >> int >> tapclose(dev_t dev, int flag, int mode, struct proc *p) >> { >> - return (tun_dev_close(dev, p)); >> + return (tun_dev_close(dev, &tap_cloner, p)); >> } >> >> int >> -tun_dev_close(dev_t dev, struct proc *p) >> +tun_dev_close(dev_t dev, const struct if_clone *ifc, struct proc *p) >> { >> - struct tun_softc *sc; >> - struct ifnet *ifp; >> - int error = 0; >> - char name[IFNAMSIZ]; >> - int destroy = 0; >> - >> - sc = tun_get(dev); >> - if (sc == NULL) >> - return (ENXIO); >> - >> - ifp = &sc->sc_if; >> - >> - /* >> - * junk all pending output >> - */ >> - NET_LOCK(); >> - CLR(ifp->if_flags, IFF_UP | IFF_RUNNING); >> - CLR(ifp->if_capabilities, TUN_IF_CAPS); >> - NET_UNLOCK(); >> - ifq_purge(&ifp->if_snd); >> - >> - CLR(sc->sc_flags, TUN_ASYNC|TUN_HDR); >> - sigio_free(&sc->sc_sigio); >> - >> - if (!ISSET(sc->sc_flags, TUN_DEAD)) { >> - /* we can't hold a reference to sc before we start a dtor */ >> - if (!ISSET(sc->sc_flags, TUN_STAYUP)) { >> - destroy = 1; >> - strlcpy(name, ifp->if_xname, sizeof(name)); >> - } else { >> - tun_link_state(ifp, LINK_STATE_DOWN); >> - } >> - } >> - >> - sc->sc_dev = 0; >> - >> - tun_put(sc); >> + char name[IFNAMSIZ]; >> >> - if (destroy) >> - if_clone_destroy(name); >> + snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev)); >> + if_clone_destroy(name); >> >> - return (error); >> + return (0); >> } >> >> /* >> @@ -831,10 +736,24 @@ tun_dev_read(dev_t dev, struct uio *uio, >> >> ifp = &sc->sc_if; >> >> - error = ifq_deq_sleep(&ifp->if_snd, &m0, ISSET(ioflag, IO_NDELAY), >> - (PZERO + 1)|PCATCH, "tunread", &sc->sc_reading, &sc->sc_dev); >> - if (error != 0) >> - goto put; >> + while ((m0 = ifq_dequeue(&ifp->if_snd)) == NULL) { >> + if (ISSET(ioflag, IO_NDELAY)) { >> + error = EWOULDBLOCK; >> + goto put; >> + } >> + >> + sc->sc_reading = 1; >> + error = tsleep_nsec(&ifp->if_snd, (PZERO + 1) | PCATCH, >> + "tunread", INFSLP); >> + sc->sc_reading = 0; >> + >> + if (error != 0) >> + goto put; >> + if (ISSET(sc->sc_flags, TUN_DEAD)) { >> + error = ENXIO; >> + goto put; >> + } >> + } >> >> #if NBPFILTER > 0 >> if (ifp->if_bpf) >> Index: sys/net/if_tun.h >> =================================================================== >> RCS file: /cvs/src/sys/net/if_tun.h,v >> diff -u -p -r1.18 if_tun.h >> --- sys/net/if_tun.h 17 Nov 2024 00:25:07 -0000 1.18 >> +++ sys/net/if_tun.h 12 Feb 2025 15:35:04 -0000 >> @@ -75,7 +75,7 @@ struct tun_hdr { >> }; >> >> #define TUN_OPEN 0x0001 >> -#define TUN_INITED 0x0002 >> +#define TUN_INITED 0x0002 /* unused */ >> #define TUN_RCOLL 0x0004 /* unused */ >> #define TUN_IASET 0x0008 >> #define TUN_DSTADDR 0x0010 >> @@ -83,7 +83,7 @@ struct tun_hdr { >> #define TUN_ASYNC 0x0080 >> #define TUN_NBIO 0x0100 >> #define TUN_BRDADDR 0x0200 >> -#define TUN_STAYUP 0x0400 >> +#define TUN_STAYUP 0x0400 /* unused */ >> #define TUN_LAYER2 0x0800 >> >> #define TUN_READY (TUN_OPEN | TUN_INITED) >> Index: sys/net/if_var.h >> =================================================================== >> RCS file: /cvs/src/sys/net/if_var.h,v >> diff -u -p -r1.135 if_var.h >> --- sys/net/if_var.h 24 Jan 2025 09:19:07 -0000 1.135 >> +++ sys/net/if_var.h 12 Feb 2025 15:35:04 -0000 >> @@ -325,6 +325,7 @@ int niq_enlist(struct niqueue *, struct >> sysctl_mq((_n), (_l), (_op), (_olp), (_np), (_nl), &(_niq)->ni_q) >> >> extern struct ifnet_head ifnetlist; >> +extern struct rwlock if_cloners_lock; >> >> void if_start(struct ifnet *); >> int if_enqueue(struct ifnet *, struct mbuf *); >> @@ -355,6 +356,7 @@ int if_isconnected(const struct ifnet *, >> >> void if_clone_attach(struct if_clone *); >> >> +int if_clone_create_locked(const char *, int); >> int if_clone_create(const char *, int); >> int if_clone_destroy(const char *); >> >> Index: sys/net/ifq.c >> =================================================================== >> RCS file: /cvs/src/sys/net/ifq.c,v >> diff -u -p -r1.56 ifq.c >> --- sys/net/ifq.c 3 Feb 2025 08:58:52 -0000 1.56 >> +++ sys/net/ifq.c 12 Feb 2025 15:35:04 -0000 >> @@ -478,45 +478,6 @@ ifq_dequeue(struct ifqueue *ifq) >> } >> >> int >> -ifq_deq_sleep(struct ifqueue *ifq, struct mbuf **mp, int nbio, int priority, >> - const char *wmesg, volatile unsigned int *sleeping, >> - volatile unsigned int *alive) >> -{ >> - struct mbuf *m; >> - void *cookie; >> - int error = 0; >> - >> - ifq_deq_enter(ifq); >> - if (ifq->ifq_len == 0 && nbio) >> - error = EWOULDBLOCK; >> - else { >> - for (;;) { >> - m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie); >> - if (m != NULL) { >> - ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie); >> - ifq->ifq_len--; >> - *mp = m; >> - break; >> - } >> - >> - (*sleeping)++; >> - error = msleep_nsec(ifq, &ifq->ifq_mtx, >> - priority, wmesg, INFSLP); >> - (*sleeping)--; >> - if (error != 0) >> - break; >> - if (!(*alive)) { >> - error = EIO; >> - break; >> - } >> - } >> - } >> - ifq_deq_leave(ifq); >> - >> - return (error); >> -} >> - >> -int >> ifq_hdatalen(struct ifqueue *ifq) >> { >> struct mbuf *m; >> Index: sys/net/ifq.h >> =================================================================== >> RCS file: /cvs/src/sys/net/ifq.h,v >> diff -u -p -r1.43 ifq.h >> --- sys/net/ifq.h 3 Feb 2025 08:58:52 -0000 1.43 >> +++ sys/net/ifq.h 12 Feb 2025 15:35:04 -0000 >> @@ -446,10 +446,6 @@ void ifq_barrier(struct ifqueue *); >> void ifq_set_oactive(struct ifqueue *); >> void ifq_deq_set_oactive(struct ifqueue *); >> >> -int ifq_deq_sleep(struct ifqueue *, struct mbuf **, int, int, >> - const char *, volatile unsigned int *, >> - volatile unsigned int *); >> - >> #define ifq_len(_ifq) READ_ONCE((_ifq)->ifq_len) >> #define ifq_empty(_ifq) (ifq_len(_ifq) == 0) >> >> > > -- > :wq Claudio