On Wed, Feb 12, 2025 at 06:47:36PM +0300, Vitaliy Makkoveev wrote: > Hi, > > This diff serializes tun(4)/tap(4) interfaces creation and destruction > from ifconfig(8) and device nodes path. Not only simplifies, but fixes > syzkaller [1] report. > > Can I ask openvpn or vmd(8) users to test it and prove that there is no > behavior changes? >
Testing on amd64 - doesn't seem to cause any change in behavior for vmd(8) guests using tap(4) with veb(4)/vport(4). > 1. https://syzkaller.appspot.com/bug?extid=171f3fbee35ba6030ee9 > > Index: sys/net/if.c > =================================================================== > RCS file: /cvs/src/sys/net/if.c,v > diff -u -p -r1.726 if.c > --- sys/net/if.c 3 Feb 2025 08:58:52 -0000 1.726 > +++ sys/net/if.c 12 Feb 2025 15:35:04 -0000 > @@ -1310,38 +1310,48 @@ if_isconnected(const struct ifnet *ifp0, > * Create a clone network interface. > */ > int > -if_clone_create(const char *name, int rdomain) > +if_clone_create_locked(const char *name, int rdomain) > { > struct if_clone *ifc; > struct ifnet *ifp; > int unit, ret; > > + rw_assert_wrlock(&if_cloners_lock); > + > ifc = if_clone_lookup(name, &unit); > if (ifc == NULL) > return (EINVAL); > - > - rw_enter_write(&if_cloners_lock); > - > if ((ifp = if_unit(name)) != NULL) { > ret = EEXIST; > - goto unlock; > + goto put; > } > > ret = (*ifc->ifc_create)(ifc, unit); > > if (ret != 0 || (ifp = if_unit(name)) == NULL) > - goto unlock; > + goto put; > > NET_LOCK(); > if_addgroup(ifp, ifc->ifc_name); > if (rdomain != 0) > if_setrdomain(ifp, rdomain); > NET_UNLOCK(); > -unlock: > - rw_exit_write(&if_cloners_lock); > +put: > if_put(ifp); > > return (ret); > +} > + > +int > +if_clone_create(const char *name, int rdomain) > +{ > + int error; > + > + rw_enter_write(&if_cloners_lock); > + error = if_clone_create_locked(name, rdomain); > + rw_exit_write(&if_cloners_lock); > + > + return (error); > } > > /* > Index: sys/net/if_tun.c > =================================================================== > RCS file: /cvs/src/sys/net/if_tun.c,v > diff -u -p -r1.250 if_tun.c > --- sys/net/if_tun.c 30 Dec 2024 02:46:00 -0000 1.250 > +++ sys/net/if_tun.c 12 Feb 2025 15:35:04 -0000 > @@ -115,7 +115,7 @@ int tundebug = TUN_DEBUG; > void tunattach(int); > > int tun_dev_open(dev_t, const struct if_clone *, int, struct proc *); > -int tun_dev_close(dev_t, struct proc *); > +int tun_dev_close(dev_t, const struct if_clone *, struct proc *); > int tun_dev_ioctl(dev_t, u_long, void *); > int tun_dev_read(dev_t, struct uio *, int); > int tun_dev_write(dev_t, struct uio *, int, int); > @@ -285,11 +285,6 @@ tun_create(struct if_clone *ifc, int uni > > sigio_init(&sc->sc_sigio); > > - /* tell tun_dev_open we're initialised */ > - > - sc->sc_flags |= TUN_INITED|TUN_STAYUP; > - wakeup(sc); > - > return (0); > > exists: > @@ -303,25 +298,10 @@ int > tun_clone_destroy(struct ifnet *ifp) > { > struct tun_softc *sc = ifp->if_softc; > - dev_t dev; > > KERNEL_ASSERT_LOCKED(); > > - if (ISSET(sc->sc_flags, TUN_DEAD)) > - return (ENXIO); > SET(sc->sc_flags, TUN_DEAD); > - > - /* kick userland off the device */ > - dev = sc->sc_dev; > - if (dev) { > - struct vnode *vp; > - > - if (vfinddev(dev, VCHR, &vp)) > - VOP_REVOKE(vp, REVOKEALL); > - > - KASSERT(sc->sc_dev == 0); > - } > - > /* prevent userland from getting to the device again */ > SMR_LIST_REMOVE_LOCKED(sc, sc_entry); > smr_barrier(); > @@ -390,145 +370,70 @@ tun_dev_open(dev_t dev, const struct if_ > struct tun_softc *sc; > struct ifnet *ifp; > int error; > - u_short stayup = 0; > - struct vnode *vp; > > char name[IFNAMSIZ]; > unsigned int rdomain; > > - /* > - * Find the vnode associated with this open before we sleep > - * and let something else revoke it. Our caller has a reference > - * to it so we don't need to account for it. > - */ > - if (!vfinddev(dev, VCHR, &vp)) > - panic("%s vfinddev failed", __func__); > - > snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev)); > - rdomain = rtable_l2(p->p_p->ps_rtableid); > > - /* let's find or make an interface to work with */ > - while ((sc = tun_name_lookup(name)) == NULL) { > - error = if_clone_create(name, rdomain); > + rw_enter_write(&if_cloners_lock); > + > + if ((ifp = if_unit(name)) == NULL) { > + rdomain = rtable_l2(p->p_p->ps_rtableid); > + error = if_clone_create_locked(name, rdomain); > switch (error) { > - case 0: /* it's probably ours */ > - stayup = TUN_STAYUP; > - /* FALLTHROUGH */ > - case EEXIST: /* we may have lost a race with someone else */ > + case 0: > + case EEXIST: > break; > default: > - return (error); > + goto out; > } > - } > - > - refcnt_take(&sc->sc_refs); > > - /* wait for it to be fully constructed before we use it */ > - for (;;) { > - if (ISSET(sc->sc_flags, TUN_DEAD)) { > - error = ENXIO; > - goto done; > - } > - > - if (ISSET(sc->sc_flags, TUN_INITED)) > - break; > - > - error = tsleep_nsec(sc, PCATCH, "tuninit", INFSLP); > - if (error != 0) { > - /* XXX if_clone_destroy if stayup? */ > - goto done; > - } > + ifp = if_unit(name); > + KASSERT(ifp != NULL); > } > > - /* Has tun_clone_destroy torn the rug out under us? */ > - if (vp->v_type == VBAD) { > - error = ENXIO; > - goto done; > - } > + sc = ifp->if_softc; > > - if (sc->sc_dev != 0) { > - /* aww, we lost */ > + if (sc->sc_dev) { > error = EBUSY; > - goto done; > + goto out; > } > - /* it's ours now */ > sc->sc_dev = dev; > - CLR(sc->sc_flags, stayup); > > - /* automatically mark the interface running on open */ > - ifp = &sc->sc_if; > NET_LOCK(); > SET(ifp->if_flags, IFF_UP | IFF_RUNNING); > NET_UNLOCK(); > tun_link_state(ifp, LINK_STATE_FULL_DUPLEX); > - error = 0; > > -done: > - tun_put(sc); > +out: > + rw_exit_write(&if_cloners_lock); > + if_put(ifp); > + > return (error); > } > > -/* > - * tunclose - close the device; if closing the real device, flush pending > - * output and unless STAYUP bring down and destroy the interface. > - */ > int > tunclose(dev_t dev, int flag, int mode, struct proc *p) > { > - return (tun_dev_close(dev, p)); > + return (tun_dev_close(dev, &tun_cloner, p)); > } > > int > tapclose(dev_t dev, int flag, int mode, struct proc *p) > { > - return (tun_dev_close(dev, p)); > + return (tun_dev_close(dev, &tap_cloner, p)); > } > > int > -tun_dev_close(dev_t dev, struct proc *p) > +tun_dev_close(dev_t dev, const struct if_clone *ifc, struct proc *p) > { > - struct tun_softc *sc; > - struct ifnet *ifp; > - int error = 0; > - char name[IFNAMSIZ]; > - int destroy = 0; > - > - sc = tun_get(dev); > - if (sc == NULL) > - return (ENXIO); > - > - ifp = &sc->sc_if; > - > - /* > - * junk all pending output > - */ > - NET_LOCK(); > - CLR(ifp->if_flags, IFF_UP | IFF_RUNNING); > - CLR(ifp->if_capabilities, TUN_IF_CAPS); > - NET_UNLOCK(); > - ifq_purge(&ifp->if_snd); > - > - CLR(sc->sc_flags, TUN_ASYNC|TUN_HDR); > - sigio_free(&sc->sc_sigio); > - > - if (!ISSET(sc->sc_flags, TUN_DEAD)) { > - /* we can't hold a reference to sc before we start a dtor */ > - if (!ISSET(sc->sc_flags, TUN_STAYUP)) { > - destroy = 1; > - strlcpy(name, ifp->if_xname, sizeof(name)); > - } else { > - tun_link_state(ifp, LINK_STATE_DOWN); > - } > - } > - > - sc->sc_dev = 0; > - > - tun_put(sc); > + char name[IFNAMSIZ]; > > - if (destroy) > - if_clone_destroy(name); > + snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev)); > + if_clone_destroy(name); > > - return (error); > + return (0); > } > > /* > @@ -831,10 +736,24 @@ tun_dev_read(dev_t dev, struct uio *uio, > > ifp = &sc->sc_if; > > - error = ifq_deq_sleep(&ifp->if_snd, &m0, ISSET(ioflag, IO_NDELAY), > - (PZERO + 1)|PCATCH, "tunread", &sc->sc_reading, &sc->sc_dev); > - if (error != 0) > - goto put; > + while ((m0 = ifq_dequeue(&ifp->if_snd)) == NULL) { > + if (ISSET(ioflag, IO_NDELAY)) { > + error = EWOULDBLOCK; > + goto put; > + } > + > + sc->sc_reading = 1; > + error = tsleep_nsec(&ifp->if_snd, (PZERO + 1) | PCATCH, > + "tunread", INFSLP); > + sc->sc_reading = 0; > + > + if (error != 0) > + goto put; > + if (ISSET(sc->sc_flags, TUN_DEAD)) { > + error = ENXIO; > + goto put; > + } > + } > > #if NBPFILTER > 0 > if (ifp->if_bpf) > Index: sys/net/if_tun.h > =================================================================== > RCS file: /cvs/src/sys/net/if_tun.h,v > diff -u -p -r1.18 if_tun.h > --- sys/net/if_tun.h 17 Nov 2024 00:25:07 -0000 1.18 > +++ sys/net/if_tun.h 12 Feb 2025 15:35:04 -0000 > @@ -75,7 +75,7 @@ struct tun_hdr { > }; > > #define TUN_OPEN 0x0001 > -#define TUN_INITED 0x0002 > +#define TUN_INITED 0x0002 /* unused */ > #define TUN_RCOLL 0x0004 /* unused */ > #define TUN_IASET 0x0008 > #define TUN_DSTADDR 0x0010 > @@ -83,7 +83,7 @@ struct tun_hdr { > #define TUN_ASYNC 0x0080 > #define TUN_NBIO 0x0100 > #define TUN_BRDADDR 0x0200 > -#define TUN_STAYUP 0x0400 > +#define TUN_STAYUP 0x0400 /* unused */ > #define TUN_LAYER2 0x0800 > > #define TUN_READY (TUN_OPEN | TUN_INITED) > Index: sys/net/if_var.h > =================================================================== > RCS file: /cvs/src/sys/net/if_var.h,v > diff -u -p -r1.135 if_var.h > --- sys/net/if_var.h 24 Jan 2025 09:19:07 -0000 1.135 > +++ sys/net/if_var.h 12 Feb 2025 15:35:04 -0000 > @@ -325,6 +325,7 @@ int niq_enlist(struct niqueue *, struct > sysctl_mq((_n), (_l), (_op), (_olp), (_np), (_nl), &(_niq)->ni_q) > > extern struct ifnet_head ifnetlist; > +extern struct rwlock if_cloners_lock; > > void if_start(struct ifnet *); > int if_enqueue(struct ifnet *, struct mbuf *); > @@ -355,6 +356,7 @@ int if_isconnected(const struct ifnet *, > > void if_clone_attach(struct if_clone *); > > +int if_clone_create_locked(const char *, int); > int if_clone_create(const char *, int); > int if_clone_destroy(const char *); > > Index: sys/net/ifq.c > =================================================================== > RCS file: /cvs/src/sys/net/ifq.c,v > diff -u -p -r1.56 ifq.c > --- sys/net/ifq.c 3 Feb 2025 08:58:52 -0000 1.56 > +++ sys/net/ifq.c 12 Feb 2025 15:35:04 -0000 > @@ -478,45 +478,6 @@ ifq_dequeue(struct ifqueue *ifq) > } > > int > -ifq_deq_sleep(struct ifqueue *ifq, struct mbuf **mp, int nbio, int priority, > - const char *wmesg, volatile unsigned int *sleeping, > - volatile unsigned int *alive) > -{ > - struct mbuf *m; > - void *cookie; > - int error = 0; > - > - ifq_deq_enter(ifq); > - if (ifq->ifq_len == 0 && nbio) > - error = EWOULDBLOCK; > - else { > - for (;;) { > - m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie); > - if (m != NULL) { > - ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie); > - ifq->ifq_len--; > - *mp = m; > - break; > - } > - > - (*sleeping)++; > - error = msleep_nsec(ifq, &ifq->ifq_mtx, > - priority, wmesg, INFSLP); > - (*sleeping)--; > - if (error != 0) > - break; > - if (!(*alive)) { > - error = EIO; > - break; > - } > - } > - } > - ifq_deq_leave(ifq); > - > - return (error); > -} > - > -int > ifq_hdatalen(struct ifqueue *ifq) > { > struct mbuf *m; > Index: sys/net/ifq.h > =================================================================== > RCS file: /cvs/src/sys/net/ifq.h,v > diff -u -p -r1.43 ifq.h > --- sys/net/ifq.h 3 Feb 2025 08:58:52 -0000 1.43 > +++ sys/net/ifq.h 12 Feb 2025 15:35:04 -0000 > @@ -446,10 +446,6 @@ void ifq_barrier(struct ifqueue *); > void ifq_set_oactive(struct ifqueue *); > void ifq_deq_set_oactive(struct ifqueue *); > > -int ifq_deq_sleep(struct ifqueue *, struct mbuf **, int, int, > - const char *, volatile unsigned int *, > - volatile unsigned int *); > - > #define ifq_len(_ifq) READ_ONCE((_ifq)->ifq_len) > #define ifq_empty(_ifq) (ifq_len(_ifq) == 0) > >