On Wed, Feb 12, 2025 at 06:47:36PM +0300, Vitaliy Makkoveev wrote:
> Hi,
> 
> This diff serializes tun(4)/tap(4) interfaces creation and destruction
> from ifconfig(8) and device nodes path. Not only simplifies, but fixes
> syzkaller [1] report.
> 
> Can I ask openvpn or vmd(8) users to test it and prove that there is no
> behavior changes?
> 

Testing on amd64 - doesn't seem to cause any change in behavior for vmd(8)
guests using tap(4) with veb(4)/vport(4).

> 1. https://syzkaller.appspot.com/bug?extid=171f3fbee35ba6030ee9
> 
> Index: sys/net/if.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if.c,v
> diff -u -p -r1.726 if.c
> --- sys/net/if.c      3 Feb 2025 08:58:52 -0000       1.726
> +++ sys/net/if.c      12 Feb 2025 15:35:04 -0000
> @@ -1310,38 +1310,48 @@ if_isconnected(const struct ifnet *ifp0,
>   * Create a clone network interface.
>   */
>  int
> -if_clone_create(const char *name, int rdomain)
> +if_clone_create_locked(const char *name, int rdomain)
>  {
>       struct if_clone *ifc;
>       struct ifnet *ifp;
>       int unit, ret;
>  
> +     rw_assert_wrlock(&if_cloners_lock);
> +
>       ifc = if_clone_lookup(name, &unit);
>       if (ifc == NULL)
>               return (EINVAL);
> -
> -     rw_enter_write(&if_cloners_lock);
> -
>       if ((ifp = if_unit(name)) != NULL) {
>               ret = EEXIST;
> -             goto unlock;
> +             goto put;
>       }
>  
>       ret = (*ifc->ifc_create)(ifc, unit);
>  
>       if (ret != 0 || (ifp = if_unit(name)) == NULL)
> -             goto unlock;
> +             goto put;
>  
>       NET_LOCK();
>       if_addgroup(ifp, ifc->ifc_name);
>       if (rdomain != 0)
>               if_setrdomain(ifp, rdomain);
>       NET_UNLOCK();
> -unlock:
> -     rw_exit_write(&if_cloners_lock);
> +put:
>       if_put(ifp);
>  
>       return (ret);
> +}
> +
> +int
> +if_clone_create(const char *name, int rdomain)
> +{
> +     int error;
> +
> +     rw_enter_write(&if_cloners_lock);
> +     error = if_clone_create_locked(name, rdomain);
> +     rw_exit_write(&if_cloners_lock);
> +
> +     return (error);
>  }
>  
>  /*
> Index: sys/net/if_tun.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_tun.c,v
> diff -u -p -r1.250 if_tun.c
> --- sys/net/if_tun.c  30 Dec 2024 02:46:00 -0000      1.250
> +++ sys/net/if_tun.c  12 Feb 2025 15:35:04 -0000
> @@ -115,7 +115,7 @@ int       tundebug = TUN_DEBUG;
>  void tunattach(int);
>  
>  int  tun_dev_open(dev_t, const struct if_clone *, int, struct proc *);
> -int  tun_dev_close(dev_t, struct proc *);
> +int  tun_dev_close(dev_t, const struct if_clone *, struct proc *);
>  int  tun_dev_ioctl(dev_t, u_long, void *);
>  int  tun_dev_read(dev_t, struct uio *, int);
>  int  tun_dev_write(dev_t, struct uio *, int, int);
> @@ -285,11 +285,6 @@ tun_create(struct if_clone *ifc, int uni
>  
>       sigio_init(&sc->sc_sigio);
>  
> -     /* tell tun_dev_open we're initialised */
> -
> -     sc->sc_flags |= TUN_INITED|TUN_STAYUP;
> -     wakeup(sc);
> -
>       return (0);
>  
>  exists:
> @@ -303,25 +298,10 @@ int
>  tun_clone_destroy(struct ifnet *ifp)
>  {
>       struct tun_softc        *sc = ifp->if_softc;
> -     dev_t                    dev;
>  
>       KERNEL_ASSERT_LOCKED();
>  
> -     if (ISSET(sc->sc_flags, TUN_DEAD))
> -             return (ENXIO);
>       SET(sc->sc_flags, TUN_DEAD);
> -
> -     /* kick userland off the device */
> -     dev = sc->sc_dev;
> -     if (dev) {
> -             struct vnode *vp;
> -
> -             if (vfinddev(dev, VCHR, &vp))
> -                     VOP_REVOKE(vp, REVOKEALL);
> -
> -             KASSERT(sc->sc_dev == 0);
> -     }
> -
>       /* prevent userland from getting to the device again */
>       SMR_LIST_REMOVE_LOCKED(sc, sc_entry);
>       smr_barrier();
> @@ -390,145 +370,70 @@ tun_dev_open(dev_t dev, const struct if_
>       struct tun_softc *sc;
>       struct ifnet *ifp;
>       int error;
> -     u_short stayup = 0;
> -     struct vnode *vp;
>  
>       char name[IFNAMSIZ];
>       unsigned int rdomain;
>  
> -     /*
> -      * Find the vnode associated with this open before we sleep
> -      * and let something else revoke it. Our caller has a reference
> -      * to it so we don't need to account for it.
> -      */
> -     if (!vfinddev(dev, VCHR, &vp))
> -             panic("%s vfinddev failed", __func__);
> -
>       snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev));
> -     rdomain = rtable_l2(p->p_p->ps_rtableid);
>  
> -     /* let's find or make an interface to work with */
> -     while ((sc = tun_name_lookup(name)) == NULL) {
> -             error = if_clone_create(name, rdomain);
> +     rw_enter_write(&if_cloners_lock);
> +
> +     if ((ifp = if_unit(name)) == NULL) {
> +             rdomain = rtable_l2(p->p_p->ps_rtableid);
> +             error = if_clone_create_locked(name, rdomain);
>               switch (error) {
> -             case 0: /* it's probably ours */
> -                     stayup = TUN_STAYUP;
> -                     /* FALLTHROUGH */
> -             case EEXIST: /* we may have lost a race with someone else */
> +             case 0:
> +             case EEXIST:
>                       break;
>               default:
> -                     return (error);
> +                     goto out;
>               }
> -     }
> -
> -     refcnt_take(&sc->sc_refs);
>  
> -     /* wait for it to be fully constructed before we use it */
> -     for (;;) {
> -             if (ISSET(sc->sc_flags, TUN_DEAD)) {
> -                     error = ENXIO;
> -                     goto done;
> -             }
> -
> -             if (ISSET(sc->sc_flags, TUN_INITED))
> -                     break;
> -
> -             error = tsleep_nsec(sc, PCATCH, "tuninit", INFSLP);
> -             if (error != 0) {
> -                     /* XXX if_clone_destroy if stayup? */
> -                     goto done;
> -             }
> +             ifp = if_unit(name);
> +             KASSERT(ifp != NULL);
>       }
>  
> -     /* Has tun_clone_destroy torn the rug out under us? */
> -     if (vp->v_type == VBAD) {
> -             error = ENXIO;
> -             goto done;
> -     }
> +     sc = ifp->if_softc;
>  
> -     if (sc->sc_dev != 0) {
> -             /* aww, we lost */
> +     if (sc->sc_dev) {
>               error = EBUSY;
> -             goto done;
> +             goto out;
>       }
> -     /* it's ours now */
>       sc->sc_dev = dev;
> -     CLR(sc->sc_flags, stayup);
>  
> -     /* automatically mark the interface running on open */
> -     ifp = &sc->sc_if;
>       NET_LOCK();
>       SET(ifp->if_flags, IFF_UP | IFF_RUNNING);
>       NET_UNLOCK();
>       tun_link_state(ifp, LINK_STATE_FULL_DUPLEX);
> -     error = 0;
>  
> -done:
> -     tun_put(sc);
> +out:
> +     rw_exit_write(&if_cloners_lock);
> +     if_put(ifp);
> +
>       return (error);
>  }
>  
> -/*
> - * tunclose - close the device; if closing the real device, flush pending
> - *  output and unless STAYUP bring down and destroy the interface.
> - */
>  int
>  tunclose(dev_t dev, int flag, int mode, struct proc *p)
>  {
> -     return (tun_dev_close(dev, p));
> +     return (tun_dev_close(dev, &tun_cloner, p));
>  }
>  
>  int
>  tapclose(dev_t dev, int flag, int mode, struct proc *p)
>  {
> -     return (tun_dev_close(dev, p));
> +     return (tun_dev_close(dev, &tap_cloner, p));
>  }
>  
>  int
> -tun_dev_close(dev_t dev, struct proc *p)
> +tun_dev_close(dev_t dev, const struct if_clone *ifc, struct proc *p)
>  {
> -     struct tun_softc        *sc;
> -     struct ifnet            *ifp;
> -     int                      error = 0;
> -     char                     name[IFNAMSIZ];
> -     int                      destroy = 0;
> -
> -     sc = tun_get(dev);
> -     if (sc == NULL)
> -             return (ENXIO);
> -
> -     ifp = &sc->sc_if;
> -
> -     /*
> -      * junk all pending output
> -      */
> -     NET_LOCK();
> -     CLR(ifp->if_flags, IFF_UP | IFF_RUNNING);
> -     CLR(ifp->if_capabilities, TUN_IF_CAPS);
> -     NET_UNLOCK();
> -     ifq_purge(&ifp->if_snd);
> -
> -     CLR(sc->sc_flags, TUN_ASYNC|TUN_HDR);
> -     sigio_free(&sc->sc_sigio);
> -
> -     if (!ISSET(sc->sc_flags, TUN_DEAD)) {
> -             /* we can't hold a reference to sc before we start a dtor */
> -             if (!ISSET(sc->sc_flags, TUN_STAYUP)) {
> -                     destroy = 1;
> -                     strlcpy(name, ifp->if_xname, sizeof(name));
> -             } else {
> -                     tun_link_state(ifp, LINK_STATE_DOWN);
> -             }
> -     }
> -
> -     sc->sc_dev = 0;
> -
> -     tun_put(sc);
> +     char name[IFNAMSIZ];
>  
> -     if (destroy)
> -             if_clone_destroy(name);
> +     snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev));
> +     if_clone_destroy(name);
>  
> -     return (error);
> +     return (0);
>  }
>  
>  /*
> @@ -831,10 +736,24 @@ tun_dev_read(dev_t dev, struct uio *uio,
>  
>       ifp = &sc->sc_if;
>  
> -     error = ifq_deq_sleep(&ifp->if_snd, &m0, ISSET(ioflag, IO_NDELAY),
> -         (PZERO + 1)|PCATCH, "tunread", &sc->sc_reading, &sc->sc_dev);
> -     if (error != 0)
> -             goto put;
> +     while ((m0 = ifq_dequeue(&ifp->if_snd)) == NULL) {
> +             if (ISSET(ioflag, IO_NDELAY)) {
> +                     error = EWOULDBLOCK;
> +                     goto put;
> +             }
> +
> +             sc->sc_reading = 1;
> +             error = tsleep_nsec(&ifp->if_snd, (PZERO + 1) | PCATCH,
> +                 "tunread", INFSLP);
> +             sc->sc_reading = 0;
> +
> +             if (error != 0)
> +                     goto put;
> +             if (ISSET(sc->sc_flags, TUN_DEAD)) {
> +                     error = ENXIO;
> +                     goto put;
> +             }
> +     }
>  
>  #if NBPFILTER > 0
>       if (ifp->if_bpf)
> Index: sys/net/if_tun.h
> ===================================================================
> RCS file: /cvs/src/sys/net/if_tun.h,v
> diff -u -p -r1.18 if_tun.h
> --- sys/net/if_tun.h  17 Nov 2024 00:25:07 -0000      1.18
> +++ sys/net/if_tun.h  12 Feb 2025 15:35:04 -0000
> @@ -75,7 +75,7 @@ struct tun_hdr {
>  };
>  
>  #define      TUN_OPEN        0x0001
> -#define      TUN_INITED      0x0002
> +#define      TUN_INITED      0x0002  /* unused */
>  #define      TUN_RCOLL       0x0004  /* unused */
>  #define      TUN_IASET       0x0008
>  #define      TUN_DSTADDR     0x0010
> @@ -83,7 +83,7 @@ struct tun_hdr {
>  #define      TUN_ASYNC       0x0080
>  #define      TUN_NBIO        0x0100
>  #define TUN_BRDADDR  0x0200
> -#define TUN_STAYUP   0x0400
> +#define TUN_STAYUP   0x0400  /* unused */
>  #define TUN_LAYER2   0x0800
>  
>  #define      TUN_READY       (TUN_OPEN | TUN_INITED)
> Index: sys/net/if_var.h
> ===================================================================
> RCS file: /cvs/src/sys/net/if_var.h,v
> diff -u -p -r1.135 if_var.h
> --- sys/net/if_var.h  24 Jan 2025 09:19:07 -0000      1.135
> +++ sys/net/if_var.h  12 Feb 2025 15:35:04 -0000
> @@ -325,6 +325,7 @@ int               niq_enlist(struct niqueue *, struct
>      sysctl_mq((_n), (_l), (_op), (_olp), (_np), (_nl), &(_niq)->ni_q)
>  
>  extern struct ifnet_head ifnetlist;
> +extern struct rwlock if_cloners_lock;
>  
>  void if_start(struct ifnet *);
>  int  if_enqueue(struct ifnet *, struct mbuf *);
> @@ -355,6 +356,7 @@ int       if_isconnected(const struct ifnet *,
>  
>  void if_clone_attach(struct if_clone *);
>  
> +int  if_clone_create_locked(const char *, int);
>  int  if_clone_create(const char *, int);
>  int  if_clone_destroy(const char *);
>  
> Index: sys/net/ifq.c
> ===================================================================
> RCS file: /cvs/src/sys/net/ifq.c,v
> diff -u -p -r1.56 ifq.c
> --- sys/net/ifq.c     3 Feb 2025 08:58:52 -0000       1.56
> +++ sys/net/ifq.c     12 Feb 2025 15:35:04 -0000
> @@ -478,45 +478,6 @@ ifq_dequeue(struct ifqueue *ifq)
>  }
>  
>  int
> -ifq_deq_sleep(struct ifqueue *ifq, struct mbuf **mp, int nbio, int priority,
> -    const char *wmesg, volatile unsigned int *sleeping,
> -    volatile unsigned int *alive)
> -{
> -     struct mbuf *m;
> -     void *cookie;
> -     int error = 0;
> -
> -     ifq_deq_enter(ifq);
> -     if (ifq->ifq_len == 0 && nbio)
> -             error = EWOULDBLOCK;
> -     else {
> -             for (;;) {
> -                     m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie);
> -                     if (m != NULL) {
> -                             ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
> -                             ifq->ifq_len--;
> -                             *mp = m;
> -                             break;
> -                     }
> -
> -                     (*sleeping)++;
> -                     error = msleep_nsec(ifq, &ifq->ifq_mtx,
> -                         priority, wmesg, INFSLP);
> -                     (*sleeping)--;
> -                     if (error != 0)
> -                             break;
> -                     if (!(*alive)) {
> -                             error = EIO;
> -                             break;
> -                     }
> -             }
> -     }
> -     ifq_deq_leave(ifq);
> -
> -     return (error);
> -}
> -
> -int
>  ifq_hdatalen(struct ifqueue *ifq)
>  {
>       struct mbuf *m;
> Index: sys/net/ifq.h
> ===================================================================
> RCS file: /cvs/src/sys/net/ifq.h,v
> diff -u -p -r1.43 ifq.h
> --- sys/net/ifq.h     3 Feb 2025 08:58:52 -0000       1.43
> +++ sys/net/ifq.h     12 Feb 2025 15:35:04 -0000
> @@ -446,10 +446,6 @@ void              ifq_barrier(struct ifqueue *);
>  void          ifq_set_oactive(struct ifqueue *);
>  void          ifq_deq_set_oactive(struct ifqueue *);
>  
> -int           ifq_deq_sleep(struct ifqueue *, struct mbuf **, int, int,
> -                  const char *, volatile unsigned int *,
> -                  volatile unsigned int *);
> -
>  #define ifq_len(_ifq)                READ_ONCE((_ifq)->ifq_len)
>  #define ifq_empty(_ifq)              (ifq_len(_ifq) == 0)
>  
> 

Reply via email to