> On 13 Feb 2025, at 19:33, Claudio Jeker <cje...@diehard.n-r-g.com> wrote:
> 
> On Wed, Feb 12, 2025 at 06:47:36PM +0300, Vitaliy Makkoveev wrote:
>> Hi,
>> 
>> This diff serializes tun(4)/tap(4) interfaces creation and destruction
>> from ifconfig(8) and device nodes path. Not only simplifies, but fixes
>> syzkaller [1] report.
>> 
>> Can I ask openvpn or vmd(8) users to test it and prove that there is no
>> behavior changes?
> 
> Just to be clear the diff changes the way tun(4) and tap(4) interfaces are
> created and destroyed. So it matters on how the interface is created and
> if it does behave properly.

The original creation and destruction paths of tun(4) are full of
sleep points, so those paths have dances to protect half-created
or half-destroyed interface. This diff uses the dedicated rwlock
for cloners in the device nodes handlers to make races impossible.

> I think the things to test are:
> - If you open /dev/tun1 is tun1 created and if you close /dev/tun1 is
> it destroyed again.

It works.

> - If tun1 alreay exists on open of /dev/tun1 then the last close of
> /dev/tun1 should not destroy tun1.

This is not the current behaviour. The TUN_STAYUP exists to prevent
the concurrent destruction from tun_dev_close() while we are in the
middle of tun_dev_open(). This could happen from the VOP_REWOKE()
path of tun_clone_destroy(). In this case the interface will be
destroyed, but not by tun_dev_close().

> - If you have /dev/tun1 open and destroy tun1 (the interface) are you
> notified that /dev/tun1 is no longer available.
> 

Only tun_dev_read() should be notified, and yes, it works.

> I think there are other cases to consider, those are just the obvious ones
> that I remember. I glanced at the diff, it changes a hell of a lot and the
> bit that scares me is that TUN_STAYUP becomes unused and looking at
> tun_dev_close() I think this is indeed not right. In other words I think
> the 2nd bit from the list above is violated.

As I previously said, the TUN_STAYUP bit required only to prevent
concurrent destruction in the middle of tun_dev_open(). Now
concurrent destruction is impossible.

> 
>> 1. https://syzkaller.appspot.com/bug?extid=171f3fbee35ba6030ee9
>> 
>> Index: sys/net/if.c
>> ===================================================================
>> RCS file: /cvs/src/sys/net/if.c,v
>> diff -u -p -r1.726 if.c
>> --- sys/net/if.c     3 Feb 2025 08:58:52 -0000       1.726
>> +++ sys/net/if.c     12 Feb 2025 15:35:04 -0000
>> @@ -1310,38 +1310,48 @@ if_isconnected(const struct ifnet *ifp0,
>>  * Create a clone network interface.
>>  */
>> int
>> -if_clone_create(const char *name, int rdomain)
>> +if_clone_create_locked(const char *name, int rdomain)
>> {
>>      struct if_clone *ifc;
>>      struct ifnet *ifp;
>>      int unit, ret;
>> 
>> +    rw_assert_wrlock(&if_cloners_lock);
>> +
>>      ifc = if_clone_lookup(name, &unit);
>>      if (ifc == NULL)
>>              return (EINVAL);
>> -
>> -    rw_enter_write(&if_cloners_lock);
>> -
>>      if ((ifp = if_unit(name)) != NULL) {
>>              ret = EEXIST;
>> -            goto unlock;
>> +            goto put;
>>      }
>> 
>>      ret = (*ifc->ifc_create)(ifc, unit);
>> 
>>      if (ret != 0 || (ifp = if_unit(name)) == NULL)
>> -            goto unlock;
>> +            goto put;
>> 
>>      NET_LOCK();
>>      if_addgroup(ifp, ifc->ifc_name);
>>      if (rdomain != 0)
>>              if_setrdomain(ifp, rdomain);
>>      NET_UNLOCK();
>> -unlock:
>> -    rw_exit_write(&if_cloners_lock);
>> +put:
>>      if_put(ifp);
>> 
>>      return (ret);
>> +}
>> +
>> +int
>> +if_clone_create(const char *name, int rdomain)
>> +{
>> +    int error;
>> +
>> +    rw_enter_write(&if_cloners_lock);
>> +    error = if_clone_create_locked(name, rdomain);
>> +    rw_exit_write(&if_cloners_lock);
>> +
>> +    return (error);
>> }
>> 
>> /*
>> Index: sys/net/if_tun.c
>> ===================================================================
>> RCS file: /cvs/src/sys/net/if_tun.c,v
>> diff -u -p -r1.250 if_tun.c
>> --- sys/net/if_tun.c 30 Dec 2024 02:46:00 -0000      1.250
>> +++ sys/net/if_tun.c 12 Feb 2025 15:35:04 -0000
>> @@ -115,7 +115,7 @@ int      tundebug = TUN_DEBUG;
>> void tunattach(int);
>> 
>> int  tun_dev_open(dev_t, const struct if_clone *, int, struct proc *);
>> -int tun_dev_close(dev_t, struct proc *);
>> +int tun_dev_close(dev_t, const struct if_clone *, struct proc *);
>> int  tun_dev_ioctl(dev_t, u_long, void *);
>> int  tun_dev_read(dev_t, struct uio *, int);
>> int  tun_dev_write(dev_t, struct uio *, int, int);
>> @@ -285,11 +285,6 @@ tun_create(struct if_clone *ifc, int uni
>> 
>>      sigio_init(&sc->sc_sigio);
>> 
>> -    /* tell tun_dev_open we're initialised */
>> -
>> -    sc->sc_flags |= TUN_INITED|TUN_STAYUP;
>> -    wakeup(sc);
>> -
>>      return (0);
>> 
>> exists:
>> @@ -303,25 +298,10 @@ int
>> tun_clone_destroy(struct ifnet *ifp)
>> {
>>      struct tun_softc        *sc = ifp->if_softc;
>> -    dev_t                    dev;
>> 
>>      KERNEL_ASSERT_LOCKED();
>> 
>> -    if (ISSET(sc->sc_flags, TUN_DEAD))
>> -            return (ENXIO);
>>      SET(sc->sc_flags, TUN_DEAD);
>> -
>> -    /* kick userland off the device */
>> -    dev = sc->sc_dev;
>> -    if (dev) {
>> -            struct vnode *vp;
>> -
>> -            if (vfinddev(dev, VCHR, &vp))
>> -                    VOP_REVOKE(vp, REVOKEALL);
>> -
>> -            KASSERT(sc->sc_dev == 0);
>> -    }
>> -
>>      /* prevent userland from getting to the device again */
>>      SMR_LIST_REMOVE_LOCKED(sc, sc_entry);
>>      smr_barrier();
>> @@ -390,145 +370,70 @@ tun_dev_open(dev_t dev, const struct if_
>>      struct tun_softc *sc;
>>      struct ifnet *ifp;
>>      int error;
>> -    u_short stayup = 0;
>> -    struct vnode *vp;
>> 
>>      char name[IFNAMSIZ];
>>      unsigned int rdomain;
>> 
>> -    /*
>> -     * Find the vnode associated with this open before we sleep
>> -     * and let something else revoke it. Our caller has a reference
>> -     * to it so we don't need to account for it.
>> -     */
>> -    if (!vfinddev(dev, VCHR, &vp))
>> -            panic("%s vfinddev failed", __func__);
>> -
>>      snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev));
>> -    rdomain = rtable_l2(p->p_p->ps_rtableid);
>> 
>> -    /* let's find or make an interface to work with */
>> -    while ((sc = tun_name_lookup(name)) == NULL) {
>> -            error = if_clone_create(name, rdomain);
>> +    rw_enter_write(&if_cloners_lock);
>> +
>> +    if ((ifp = if_unit(name)) == NULL) {
>> +            rdomain = rtable_l2(p->p_p->ps_rtableid);
>> +            error = if_clone_create_locked(name, rdomain);
>>              switch (error) {
>> -            case 0: /* it's probably ours */
>> -                    stayup = TUN_STAYUP;
>> -                    /* FALLTHROUGH */
>> -            case EEXIST: /* we may have lost a race with someone else */
>> +            case 0:
>> +            case EEXIST:
>>                      break;
>>              default:
>> -                    return (error);
>> +                    goto out;
>>              }
>> -    }
>> -
>> -    refcnt_take(&sc->sc_refs);
>> 
>> -    /* wait for it to be fully constructed before we use it */
>> -    for (;;) {
>> -            if (ISSET(sc->sc_flags, TUN_DEAD)) {
>> -                    error = ENXIO;
>> -                    goto done;
>> -            }
>> -
>> -            if (ISSET(sc->sc_flags, TUN_INITED))
>> -                    break;
>> -
>> -            error = tsleep_nsec(sc, PCATCH, "tuninit", INFSLP);
>> -            if (error != 0) {
>> -                    /* XXX if_clone_destroy if stayup? */
>> -                    goto done;
>> -            }
>> +            ifp = if_unit(name);
>> +            KASSERT(ifp != NULL);
>>      }
>> 
>> -    /* Has tun_clone_destroy torn the rug out under us? */
>> -    if (vp->v_type == VBAD) {
>> -            error = ENXIO;
>> -            goto done;
>> -    }
>> +    sc = ifp->if_softc;
>> 
>> -    if (sc->sc_dev != 0) {
>> -            /* aww, we lost */
>> +    if (sc->sc_dev) {
>>              error = EBUSY;
>> -            goto done;
>> +            goto out;
>>      }
>> -    /* it's ours now */
>>      sc->sc_dev = dev;
>> -    CLR(sc->sc_flags, stayup);
>> 
>> -    /* automatically mark the interface running on open */
>> -    ifp = &sc->sc_if;
>>      NET_LOCK();
>>      SET(ifp->if_flags, IFF_UP | IFF_RUNNING);
>>      NET_UNLOCK();
>>      tun_link_state(ifp, LINK_STATE_FULL_DUPLEX);
>> -    error = 0;
>> 
>> -done:
>> -    tun_put(sc);
>> +out:
>> +    rw_exit_write(&if_cloners_lock);
>> +    if_put(ifp);
>> +
>>      return (error);
>> }
>> 
>> -/*
>> - * tunclose - close the device; if closing the real device, flush pending
>> - *  output and unless STAYUP bring down and destroy the interface.
>> - */
>> int
>> tunclose(dev_t dev, int flag, int mode, struct proc *p)
>> {
>> -    return (tun_dev_close(dev, p));
>> +    return (tun_dev_close(dev, &tun_cloner, p));
>> }
>> 
>> int
>> tapclose(dev_t dev, int flag, int mode, struct proc *p)
>> {
>> -    return (tun_dev_close(dev, p));
>> +    return (tun_dev_close(dev, &tap_cloner, p));
>> }
>> 
>> int
>> -tun_dev_close(dev_t dev, struct proc *p)
>> +tun_dev_close(dev_t dev, const struct if_clone *ifc, struct proc *p)
>> {
>> -    struct tun_softc        *sc;
>> -    struct ifnet            *ifp;
>> -    int                      error = 0;
>> -    char                     name[IFNAMSIZ];
>> -    int                      destroy = 0;
>> -
>> -    sc = tun_get(dev);
>> -    if (sc == NULL)
>> -            return (ENXIO);
>> -
>> -    ifp = &sc->sc_if;
>> -
>> -    /*
>> -     * junk all pending output
>> -     */
>> -    NET_LOCK();
>> -    CLR(ifp->if_flags, IFF_UP | IFF_RUNNING);
>> -    CLR(ifp->if_capabilities, TUN_IF_CAPS);
>> -    NET_UNLOCK();
>> -    ifq_purge(&ifp->if_snd);
>> -
>> -    CLR(sc->sc_flags, TUN_ASYNC|TUN_HDR);
>> -    sigio_free(&sc->sc_sigio);
>> -
>> -    if (!ISSET(sc->sc_flags, TUN_DEAD)) {
>> -            /* we can't hold a reference to sc before we start a dtor */
>> -            if (!ISSET(sc->sc_flags, TUN_STAYUP)) {
>> -                    destroy = 1;
>> -                    strlcpy(name, ifp->if_xname, sizeof(name));
>> -            } else {
>> -                    tun_link_state(ifp, LINK_STATE_DOWN);
>> -            }
>> -    }
>> -
>> -    sc->sc_dev = 0;
>> -
>> -    tun_put(sc);
>> +    char name[IFNAMSIZ];
>> 
>> -    if (destroy)
>> -            if_clone_destroy(name);
>> +    snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev));
>> +    if_clone_destroy(name);
>> 
>> -    return (error);
>> +    return (0);
>> }
>> 
>> /*
>> @@ -831,10 +736,24 @@ tun_dev_read(dev_t dev, struct uio *uio,
>> 
>>      ifp = &sc->sc_if;
>> 
>> -    error = ifq_deq_sleep(&ifp->if_snd, &m0, ISSET(ioflag, IO_NDELAY),
>> -        (PZERO + 1)|PCATCH, "tunread", &sc->sc_reading, &sc->sc_dev);
>> -    if (error != 0)
>> -            goto put;
>> +    while ((m0 = ifq_dequeue(&ifp->if_snd)) == NULL) {
>> +            if (ISSET(ioflag, IO_NDELAY)) {
>> +                    error = EWOULDBLOCK;
>> +                    goto put;
>> +            }
>> +
>> +            sc->sc_reading = 1;
>> +            error = tsleep_nsec(&ifp->if_snd, (PZERO + 1) | PCATCH,
>> +                "tunread", INFSLP);
>> +            sc->sc_reading = 0;
>> +
>> +            if (error != 0)
>> +                    goto put;
>> +            if (ISSET(sc->sc_flags, TUN_DEAD)) {
>> +                    error = ENXIO;
>> +                    goto put;
>> +            }
>> +    }
>> 
>> #if NBPFILTER > 0
>>      if (ifp->if_bpf)
>> Index: sys/net/if_tun.h
>> ===================================================================
>> RCS file: /cvs/src/sys/net/if_tun.h,v
>> diff -u -p -r1.18 if_tun.h
>> --- sys/net/if_tun.h 17 Nov 2024 00:25:07 -0000      1.18
>> +++ sys/net/if_tun.h 12 Feb 2025 15:35:04 -0000
>> @@ -75,7 +75,7 @@ struct tun_hdr {
>> };
>> 
>> #define      TUN_OPEN        0x0001
>> -#define     TUN_INITED      0x0002
>> +#define     TUN_INITED      0x0002  /* unused */
>> #define      TUN_RCOLL       0x0004  /* unused */
>> #define      TUN_IASET       0x0008
>> #define      TUN_DSTADDR     0x0010
>> @@ -83,7 +83,7 @@ struct tun_hdr {
>> #define      TUN_ASYNC       0x0080
>> #define      TUN_NBIO        0x0100
>> #define TUN_BRDADDR  0x0200
>> -#define TUN_STAYUP  0x0400
>> +#define TUN_STAYUP  0x0400  /* unused */
>> #define TUN_LAYER2   0x0800
>> 
>> #define      TUN_READY       (TUN_OPEN | TUN_INITED)
>> Index: sys/net/if_var.h
>> ===================================================================
>> RCS file: /cvs/src/sys/net/if_var.h,v
>> diff -u -p -r1.135 if_var.h
>> --- sys/net/if_var.h 24 Jan 2025 09:19:07 -0000      1.135
>> +++ sys/net/if_var.h 12 Feb 2025 15:35:04 -0000
>> @@ -325,6 +325,7 @@ int              niq_enlist(struct niqueue *, struct
>>     sysctl_mq((_n), (_l), (_op), (_olp), (_np), (_nl), &(_niq)->ni_q)
>> 
>> extern struct ifnet_head ifnetlist;
>> +extern struct rwlock if_cloners_lock;
>> 
>> void if_start(struct ifnet *);
>> int  if_enqueue(struct ifnet *, struct mbuf *);
>> @@ -355,6 +356,7 @@ int      if_isconnected(const struct ifnet *,
>> 
>> void if_clone_attach(struct if_clone *);
>> 
>> +int if_clone_create_locked(const char *, int);
>> int  if_clone_create(const char *, int);
>> int  if_clone_destroy(const char *);
>> 
>> Index: sys/net/ifq.c
>> ===================================================================
>> RCS file: /cvs/src/sys/net/ifq.c,v
>> diff -u -p -r1.56 ifq.c
>> --- sys/net/ifq.c    3 Feb 2025 08:58:52 -0000       1.56
>> +++ sys/net/ifq.c    12 Feb 2025 15:35:04 -0000
>> @@ -478,45 +478,6 @@ ifq_dequeue(struct ifqueue *ifq)
>> }
>> 
>> int
>> -ifq_deq_sleep(struct ifqueue *ifq, struct mbuf **mp, int nbio, int priority,
>> -    const char *wmesg, volatile unsigned int *sleeping,
>> -    volatile unsigned int *alive)
>> -{
>> -    struct mbuf *m;
>> -    void *cookie;
>> -    int error = 0;
>> -
>> -    ifq_deq_enter(ifq);
>> -    if (ifq->ifq_len == 0 && nbio)
>> -            error = EWOULDBLOCK;
>> -    else {
>> -            for (;;) {
>> -                    m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie);
>> -                    if (m != NULL) {
>> -                            ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
>> -                            ifq->ifq_len--;
>> -                            *mp = m;
>> -                            break;
>> -                    }
>> -
>> -                    (*sleeping)++;
>> -                    error = msleep_nsec(ifq, &ifq->ifq_mtx,
>> -                        priority, wmesg, INFSLP);
>> -                    (*sleeping)--;
>> -                    if (error != 0)
>> -                            break;
>> -                    if (!(*alive)) {
>> -                            error = EIO;
>> -                            break;
>> -                    }
>> -            }
>> -    }
>> -    ifq_deq_leave(ifq);
>> -
>> -    return (error);
>> -}
>> -
>> -int
>> ifq_hdatalen(struct ifqueue *ifq)
>> {
>>      struct mbuf *m;
>> Index: sys/net/ifq.h
>> ===================================================================
>> RCS file: /cvs/src/sys/net/ifq.h,v
>> diff -u -p -r1.43 ifq.h
>> --- sys/net/ifq.h    3 Feb 2025 08:58:52 -0000       1.43
>> +++ sys/net/ifq.h    12 Feb 2025 15:35:04 -0000
>> @@ -446,10 +446,6 @@ void             ifq_barrier(struct ifqueue *);
>> void          ifq_set_oactive(struct ifqueue *);
>> void          ifq_deq_set_oactive(struct ifqueue *);
>> 
>> -int          ifq_deq_sleep(struct ifqueue *, struct mbuf **, int, int,
>> -                 const char *, volatile unsigned int *,
>> -                 volatile unsigned int *);
>> -
>> #define ifq_len(_ifq)                READ_ONCE((_ifq)->ifq_len)
>> #define ifq_empty(_ifq)              (ifq_len(_ifq) == 0)
>> 
>> 
> 
> -- 
> :wq Claudio

Reply via email to