On Wed, Feb 12, 2025 at 06:47:36PM +0300, Vitaliy Makkoveev wrote: > Hi, > > This diff serializes tun(4)/tap(4) interfaces creation and destruction > from ifconfig(8) and device nodes path. Not only simplifies, but fixes > syzkaller [1] report. > > Can I ask openvpn or vmd(8) users to test it and prove that there is no > behavior changes?
Just to be clear the diff changes the way tun(4) and tap(4) interfaces are created and destroyed. So it matters on how the interface is created and if it does behave properly. I think the things to test are: - If you open /dev/tun1 is tun1 created and if you close /dev/tun1 is it destroyed again. - If tun1 alreay exists on open of /dev/tun1 then the last close of /dev/tun1 should not destroy tun1. - If you have /dev/tun1 open and destroy tun1 (the interface) are you notified that /dev/tun1 is no longer available. I think there are other cases to consider, those are just the obvious ones that I remember. I glanced at the diff, it changes a hell of a lot and the bit that scares me is that TUN_STAYUP becomes unused and looking at tun_dev_close() I think this is indeed not right. In other words I think the 2nd bit from the list above is violated. > 1. https://syzkaller.appspot.com/bug?extid=171f3fbee35ba6030ee9 > > Index: sys/net/if.c > =================================================================== > RCS file: /cvs/src/sys/net/if.c,v > diff -u -p -r1.726 if.c > --- sys/net/if.c 3 Feb 2025 08:58:52 -0000 1.726 > +++ sys/net/if.c 12 Feb 2025 15:35:04 -0000 > @@ -1310,38 +1310,48 @@ if_isconnected(const struct ifnet *ifp0, > * Create a clone network interface. > */ > int > -if_clone_create(const char *name, int rdomain) > +if_clone_create_locked(const char *name, int rdomain) > { > struct if_clone *ifc; > struct ifnet *ifp; > int unit, ret; > > + rw_assert_wrlock(&if_cloners_lock); > + > ifc = if_clone_lookup(name, &unit); > if (ifc == NULL) > return (EINVAL); > - > - rw_enter_write(&if_cloners_lock); > - > if ((ifp = if_unit(name)) != NULL) { > ret = EEXIST; > - goto unlock; > + goto put; > } > > ret = (*ifc->ifc_create)(ifc, unit); > > if (ret != 0 || (ifp = if_unit(name)) == NULL) > - goto unlock; > + goto put; > > NET_LOCK(); > if_addgroup(ifp, ifc->ifc_name); > if (rdomain != 0) > if_setrdomain(ifp, rdomain); > NET_UNLOCK(); > -unlock: > - rw_exit_write(&if_cloners_lock); > +put: > if_put(ifp); > > return (ret); > +} > + > +int > +if_clone_create(const char *name, int rdomain) > +{ > + int error; > + > + rw_enter_write(&if_cloners_lock); > + error = if_clone_create_locked(name, rdomain); > + rw_exit_write(&if_cloners_lock); > + > + return (error); > } > > /* > Index: sys/net/if_tun.c > =================================================================== > RCS file: /cvs/src/sys/net/if_tun.c,v > diff -u -p -r1.250 if_tun.c > --- sys/net/if_tun.c 30 Dec 2024 02:46:00 -0000 1.250 > +++ sys/net/if_tun.c 12 Feb 2025 15:35:04 -0000 > @@ -115,7 +115,7 @@ int tundebug = TUN_DEBUG; > void tunattach(int); > > int tun_dev_open(dev_t, const struct if_clone *, int, struct proc *); > -int tun_dev_close(dev_t, struct proc *); > +int tun_dev_close(dev_t, const struct if_clone *, struct proc *); > int tun_dev_ioctl(dev_t, u_long, void *); > int tun_dev_read(dev_t, struct uio *, int); > int tun_dev_write(dev_t, struct uio *, int, int); > @@ -285,11 +285,6 @@ tun_create(struct if_clone *ifc, int uni > > sigio_init(&sc->sc_sigio); > > - /* tell tun_dev_open we're initialised */ > - > - sc->sc_flags |= TUN_INITED|TUN_STAYUP; > - wakeup(sc); > - > return (0); > > exists: > @@ -303,25 +298,10 @@ int > tun_clone_destroy(struct ifnet *ifp) > { > struct tun_softc *sc = ifp->if_softc; > - dev_t dev; > > KERNEL_ASSERT_LOCKED(); > > - if (ISSET(sc->sc_flags, TUN_DEAD)) > - return (ENXIO); > SET(sc->sc_flags, TUN_DEAD); > - > - /* kick userland off the device */ > - dev = sc->sc_dev; > - if (dev) { > - struct vnode *vp; > - > - if (vfinddev(dev, VCHR, &vp)) > - VOP_REVOKE(vp, REVOKEALL); > - > - KASSERT(sc->sc_dev == 0); > - } > - > /* prevent userland from getting to the device again */ > SMR_LIST_REMOVE_LOCKED(sc, sc_entry); > smr_barrier(); > @@ -390,145 +370,70 @@ tun_dev_open(dev_t dev, const struct if_ > struct tun_softc *sc; > struct ifnet *ifp; > int error; > - u_short stayup = 0; > - struct vnode *vp; > > char name[IFNAMSIZ]; > unsigned int rdomain; > > - /* > - * Find the vnode associated with this open before we sleep > - * and let something else revoke it. Our caller has a reference > - * to it so we don't need to account for it. > - */ > - if (!vfinddev(dev, VCHR, &vp)) > - panic("%s vfinddev failed", __func__); > - > snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev)); > - rdomain = rtable_l2(p->p_p->ps_rtableid); > > - /* let's find or make an interface to work with */ > - while ((sc = tun_name_lookup(name)) == NULL) { > - error = if_clone_create(name, rdomain); > + rw_enter_write(&if_cloners_lock); > + > + if ((ifp = if_unit(name)) == NULL) { > + rdomain = rtable_l2(p->p_p->ps_rtableid); > + error = if_clone_create_locked(name, rdomain); > switch (error) { > - case 0: /* it's probably ours */ > - stayup = TUN_STAYUP; > - /* FALLTHROUGH */ > - case EEXIST: /* we may have lost a race with someone else */ > + case 0: > + case EEXIST: > break; > default: > - return (error); > + goto out; > } > - } > - > - refcnt_take(&sc->sc_refs); > > - /* wait for it to be fully constructed before we use it */ > - for (;;) { > - if (ISSET(sc->sc_flags, TUN_DEAD)) { > - error = ENXIO; > - goto done; > - } > - > - if (ISSET(sc->sc_flags, TUN_INITED)) > - break; > - > - error = tsleep_nsec(sc, PCATCH, "tuninit", INFSLP); > - if (error != 0) { > - /* XXX if_clone_destroy if stayup? */ > - goto done; > - } > + ifp = if_unit(name); > + KASSERT(ifp != NULL); > } > > - /* Has tun_clone_destroy torn the rug out under us? */ > - if (vp->v_type == VBAD) { > - error = ENXIO; > - goto done; > - } > + sc = ifp->if_softc; > > - if (sc->sc_dev != 0) { > - /* aww, we lost */ > + if (sc->sc_dev) { > error = EBUSY; > - goto done; > + goto out; > } > - /* it's ours now */ > sc->sc_dev = dev; > - CLR(sc->sc_flags, stayup); > > - /* automatically mark the interface running on open */ > - ifp = &sc->sc_if; > NET_LOCK(); > SET(ifp->if_flags, IFF_UP | IFF_RUNNING); > NET_UNLOCK(); > tun_link_state(ifp, LINK_STATE_FULL_DUPLEX); > - error = 0; > > -done: > - tun_put(sc); > +out: > + rw_exit_write(&if_cloners_lock); > + if_put(ifp); > + > return (error); > } > > -/* > - * tunclose - close the device; if closing the real device, flush pending > - * output and unless STAYUP bring down and destroy the interface. > - */ > int > tunclose(dev_t dev, int flag, int mode, struct proc *p) > { > - return (tun_dev_close(dev, p)); > + return (tun_dev_close(dev, &tun_cloner, p)); > } > > int > tapclose(dev_t dev, int flag, int mode, struct proc *p) > { > - return (tun_dev_close(dev, p)); > + return (tun_dev_close(dev, &tap_cloner, p)); > } > > int > -tun_dev_close(dev_t dev, struct proc *p) > +tun_dev_close(dev_t dev, const struct if_clone *ifc, struct proc *p) > { > - struct tun_softc *sc; > - struct ifnet *ifp; > - int error = 0; > - char name[IFNAMSIZ]; > - int destroy = 0; > - > - sc = tun_get(dev); > - if (sc == NULL) > - return (ENXIO); > - > - ifp = &sc->sc_if; > - > - /* > - * junk all pending output > - */ > - NET_LOCK(); > - CLR(ifp->if_flags, IFF_UP | IFF_RUNNING); > - CLR(ifp->if_capabilities, TUN_IF_CAPS); > - NET_UNLOCK(); > - ifq_purge(&ifp->if_snd); > - > - CLR(sc->sc_flags, TUN_ASYNC|TUN_HDR); > - sigio_free(&sc->sc_sigio); > - > - if (!ISSET(sc->sc_flags, TUN_DEAD)) { > - /* we can't hold a reference to sc before we start a dtor */ > - if (!ISSET(sc->sc_flags, TUN_STAYUP)) { > - destroy = 1; > - strlcpy(name, ifp->if_xname, sizeof(name)); > - } else { > - tun_link_state(ifp, LINK_STATE_DOWN); > - } > - } > - > - sc->sc_dev = 0; > - > - tun_put(sc); > + char name[IFNAMSIZ]; > > - if (destroy) > - if_clone_destroy(name); > + snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev)); > + if_clone_destroy(name); > > - return (error); > + return (0); > } > > /* > @@ -831,10 +736,24 @@ tun_dev_read(dev_t dev, struct uio *uio, > > ifp = &sc->sc_if; > > - error = ifq_deq_sleep(&ifp->if_snd, &m0, ISSET(ioflag, IO_NDELAY), > - (PZERO + 1)|PCATCH, "tunread", &sc->sc_reading, &sc->sc_dev); > - if (error != 0) > - goto put; > + while ((m0 = ifq_dequeue(&ifp->if_snd)) == NULL) { > + if (ISSET(ioflag, IO_NDELAY)) { > + error = EWOULDBLOCK; > + goto put; > + } > + > + sc->sc_reading = 1; > + error = tsleep_nsec(&ifp->if_snd, (PZERO + 1) | PCATCH, > + "tunread", INFSLP); > + sc->sc_reading = 0; > + > + if (error != 0) > + goto put; > + if (ISSET(sc->sc_flags, TUN_DEAD)) { > + error = ENXIO; > + goto put; > + } > + } > > #if NBPFILTER > 0 > if (ifp->if_bpf) > Index: sys/net/if_tun.h > =================================================================== > RCS file: /cvs/src/sys/net/if_tun.h,v > diff -u -p -r1.18 if_tun.h > --- sys/net/if_tun.h 17 Nov 2024 00:25:07 -0000 1.18 > +++ sys/net/if_tun.h 12 Feb 2025 15:35:04 -0000 > @@ -75,7 +75,7 @@ struct tun_hdr { > }; > > #define TUN_OPEN 0x0001 > -#define TUN_INITED 0x0002 > +#define TUN_INITED 0x0002 /* unused */ > #define TUN_RCOLL 0x0004 /* unused */ > #define TUN_IASET 0x0008 > #define TUN_DSTADDR 0x0010 > @@ -83,7 +83,7 @@ struct tun_hdr { > #define TUN_ASYNC 0x0080 > #define TUN_NBIO 0x0100 > #define TUN_BRDADDR 0x0200 > -#define TUN_STAYUP 0x0400 > +#define TUN_STAYUP 0x0400 /* unused */ > #define TUN_LAYER2 0x0800 > > #define TUN_READY (TUN_OPEN | TUN_INITED) > Index: sys/net/if_var.h > =================================================================== > RCS file: /cvs/src/sys/net/if_var.h,v > diff -u -p -r1.135 if_var.h > --- sys/net/if_var.h 24 Jan 2025 09:19:07 -0000 1.135 > +++ sys/net/if_var.h 12 Feb 2025 15:35:04 -0000 > @@ -325,6 +325,7 @@ int niq_enlist(struct niqueue *, struct > sysctl_mq((_n), (_l), (_op), (_olp), (_np), (_nl), &(_niq)->ni_q) > > extern struct ifnet_head ifnetlist; > +extern struct rwlock if_cloners_lock; > > void if_start(struct ifnet *); > int if_enqueue(struct ifnet *, struct mbuf *); > @@ -355,6 +356,7 @@ int if_isconnected(const struct ifnet *, > > void if_clone_attach(struct if_clone *); > > +int if_clone_create_locked(const char *, int); > int if_clone_create(const char *, int); > int if_clone_destroy(const char *); > > Index: sys/net/ifq.c > =================================================================== > RCS file: /cvs/src/sys/net/ifq.c,v > diff -u -p -r1.56 ifq.c > --- sys/net/ifq.c 3 Feb 2025 08:58:52 -0000 1.56 > +++ sys/net/ifq.c 12 Feb 2025 15:35:04 -0000 > @@ -478,45 +478,6 @@ ifq_dequeue(struct ifqueue *ifq) > } > > int > -ifq_deq_sleep(struct ifqueue *ifq, struct mbuf **mp, int nbio, int priority, > - const char *wmesg, volatile unsigned int *sleeping, > - volatile unsigned int *alive) > -{ > - struct mbuf *m; > - void *cookie; > - int error = 0; > - > - ifq_deq_enter(ifq); > - if (ifq->ifq_len == 0 && nbio) > - error = EWOULDBLOCK; > - else { > - for (;;) { > - m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie); > - if (m != NULL) { > - ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie); > - ifq->ifq_len--; > - *mp = m; > - break; > - } > - > - (*sleeping)++; > - error = msleep_nsec(ifq, &ifq->ifq_mtx, > - priority, wmesg, INFSLP); > - (*sleeping)--; > - if (error != 0) > - break; > - if (!(*alive)) { > - error = EIO; > - break; > - } > - } > - } > - ifq_deq_leave(ifq); > - > - return (error); > -} > - > -int > ifq_hdatalen(struct ifqueue *ifq) > { > struct mbuf *m; > Index: sys/net/ifq.h > =================================================================== > RCS file: /cvs/src/sys/net/ifq.h,v > diff -u -p -r1.43 ifq.h > --- sys/net/ifq.h 3 Feb 2025 08:58:52 -0000 1.43 > +++ sys/net/ifq.h 12 Feb 2025 15:35:04 -0000 > @@ -446,10 +446,6 @@ void ifq_barrier(struct ifqueue *); > void ifq_set_oactive(struct ifqueue *); > void ifq_deq_set_oactive(struct ifqueue *); > > -int ifq_deq_sleep(struct ifqueue *, struct mbuf **, int, int, > - const char *, volatile unsigned int *, > - volatile unsigned int *); > - > #define ifq_len(_ifq) READ_ONCE((_ifq)->ifq_len) > #define ifq_empty(_ifq) (ifq_len(_ifq) == 0) > > -- :wq Claudio