hello,

On 2023-12-07 18:14, Vitaliy Makkoveev wrote:
> On Thu, Nov 30, 2023 at 04:51:06PM +0300, Vitaliy Makkoveev wrote:
> > On Thu, Nov 30, 2023 at 08:44:19AM +0100, Alexandr Nedvedicky wrote:
> > > Hello Johan,
> > > 
> > > On Wed, Nov 29, 2023 at 11:24:59PM -0500, Johan Huldtgren wrote:
> > > > 
> > > > so my machine paniced today, but the panic this time is completely 
> > > > different.
> > > > I don't know if it's related to this issue, the patch, or a completely 
> > > > new
> > > > issue, but I figured I'd start reporting it here. Unfortuntately when I 
> > > > tried
> > > > to swap CPU to collect traces from the other ones the machine froze and 
> > > > I was
> > > > forced to power cycle it. So I have the panic and initial trace but 
> > > > that's it. 
> > > > 
> > > > panic: ip_output no HDR
> > > > Stopped at      db_enter+0x14:  popq    %rbp
> > > >     TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
> > > >   74003  25022      0        0x10          0    2  afpd
> > > >  355827  29745    107   0x1100002  0x4000000    3  vmd
> > > >  451006  29745    107   0x1100002  0x4000000    4  vmd
> > > >  131508  78367    107   0x1100002  0x4000000    5  vmd
> > > >  112644  78367    107   0x1100002  0x4000000    1  vmd
> > > > *133058  91446      0     0x14000      0x200    0  softnet0
> > > > db_enter() at db_enter+0x14
> > > > panic(ffffffff820c20df) at panic+0xc3
> > > > ip_output(fffffd8076b76e00,0,fffffd9c9e59e708,0,0,fffffd9c9e59e690,e4a23bf8c0204936)
> > > >  at ip_output+0xa26
> > > > udp_output(fffffd9c9e59e690,fffffd8076b76e00,fffffd8079d14b00,0) at 
> > > > udp_output+0x3be
> > > > sosend(fffffd9c9e59f000,fffffd8079d14b00,0,fffffd8076b76e00,0,0) at 
> > > > sosend+0x37f
> > > > pflow_output_process(ffff8000011a0800) at pflow_output_process+0x67
> > > > taskq_thread(ffff800000035200) at taskq_thread+0x100
> > > > end trace frame: 0x0, count: 8
> > > > https://www.openbsd.org/ddb.html describes the minimum info required in 
> > > > bug
> > > > reports.  Insufficient info makes it difficult to find and fix bugs.
> > > > ddb{0}>
> > > > 
> > > > ddb{0}> show panic
> > > > *cpu0: ip_output no HDR
> > > > 
> > > > ddb{0}> trace
> > > > db_enter() at db_enter+0x14
> > > > panic(ffffffff820c20df) at panic+0xc3
> > > > ip_output(fffffd8076b76e00,0,fffffd9c9e59e708,0,0,fffffd9c9e59e690,e4a23bf8c0204936)
> > > >  at ip_output+0xa26
> > > > udp_output(fffffd9c9e59e690,fffffd8076b76e00,fffffd8079d14b00,0) at 
> > > > udp_output+0x3be
> > > > sosend(fffffd9c9e59f000,fffffd8079d14b00,0,fffffd8076b76e00,0,0) at 
> > > > sosend+0x37f
> > > > pflow_output_process(ffff8000011a0800) at pflow_output_process+0x67
> > > > taskq_thread(ffff800000035200) at taskq_thread+0x100
> > > > end trace frame: 0x0, count: -7
> > > > 
> > > 
> > >     This is a different issue to what we were seeing. The panic indicates
> > >     the ip_output() function deals with packet buffer which contains no
> > >     ip header. How it could happen that's the question...
> > > 
> > 
> > I found the reason of that panic. The `sc_mbuf{,6}' cumulative mbuf(9)
> > of pflow_softc structure has missing protection. So it was overwritten
> > concurrently with pflow_sendout_*(). I will fix this later.
> >
> 
> Here id the diff. I introduces `sc_mtx' mutex(9) to protect the most of 
> pflow_softc structure. The `send_nam', `sc_flowsrc' and `sc_flowdst' are
> prtected by `sc_lock' rwlock(9). `sc_tmpl_ipfix' is immutable.
> 
> Also, the pflow_sendout_ipfix_tmpl() calls pflow_get_mbuf() with NULL
> instead of `sc'. This fix also included to this diff.
> 
> Please note, this diff does not fix all the problems in the pflow(4).
> The ifconfig create/destroy sequence could still break the kernel. I
> have ok'ed diff to fix this but did not commit it yet for some reason.
> Also the `pflowstats' data left unprotected. I will fix this with
> separate diff.
> 
> Please test this diff and let us know the result. I will continue with
> pflow(4) after committing this.

Would this apply against 7.4 or only -current? Machine is currently
running 7.4+syspatches (and sashan@ patch from earlier in this thread)
if I need to get on -current that's not a big deal it would just take
a few more days once I get back home tomorrow.

thanks,

.jh

> Index: sys/net/if_pflow.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_pflow.c,v
> retrieving revision 1.100
> diff -u -p -r1.100 if_pflow.c
> --- sys/net/if_pflow.c        9 Nov 2023 08:53:20 -0000       1.100
> +++ sys/net/if_pflow.c        7 Dec 2023 14:58:18 -0000
> @@ -29,6 +29,7 @@
>  #include <sys/kernel.h>
>  #include <sys/socketvar.h>
>  #include <sys/sysctl.h>
> +#include <sys/mutex.h>
>  
>  #include <net/if.h>
>  #include <net/if_types.h>
> @@ -147,6 +148,7 @@ pflow_clone_create(struct if_clone *ifc,
>  
>       pflowif = malloc(sizeof(*pflowif), M_DEVBUF, M_WAITOK|M_ZERO);
>       rw_init(&pflowif->sc_lock, "pflowlk");
> +     mtx_init(&pflowif->sc_mtx, IPL_MPFLOOR);
>       MGET(pflowif->send_nam, M_WAIT, MT_SONAME);
>       pflowif->sc_version = PFLOW_PROTO_DEFAULT;
>  
> @@ -347,6 +349,8 @@ pflow_set(struct pflow_softc *sc, struct
>               }
>       }
>  
> +     rw_assert_wrlock(&sc->sc_lock);
> +
>       pflow_flush(sc);
>  
>       if (pflowr->addrmask & PFLOW_MASK_DSTIP) {
> @@ -461,6 +465,8 @@ pflow_set(struct pflow_softc *sc, struct
>               sc->so = NULL;
>       }
>  
> +     mtx_enter(&sc->sc_mtx);
> +
>       /* error check is above */
>       if (pflowr->addrmask & PFLOW_MASK_VERSION)
>               sc->sc_version = pflowr->version;
> @@ -479,6 +485,8 @@ pflow_set(struct pflow_softc *sc, struct
>               break;
>       }
>  
> +     mtx_leave(&sc->sc_mtx);
> +
>       return (0);
>  }
>  
> @@ -504,10 +512,12 @@ pflowioctl(struct ifnet *ifp, u_long cmd
>               NET_LOCK();
>               if ((ifp->if_flags & IFF_UP) && sc->so != NULL) {
>                       ifp->if_flags |= IFF_RUNNING;
> +                     mtx_enter(&sc->sc_mtx);
>                       sc->sc_gcounter=pflowstats.pflow_flows;
>                       /* send templates on startup */
>                       if (sc->sc_version == PFLOW_PROTO_10)
>                               pflow_sendout_ipfix_tmpl(sc);
> +                     mtx_leave(&sc->sc_mtx);
>               } else
>                       ifp->if_flags &= ~IFF_RUNNING;
>               rw_exit_read(&sc->sc_lock);
> @@ -519,19 +529,28 @@ pflowioctl(struct ifnet *ifp, u_long cmd
>                       ifr->ifr_mtu = MCLBYTES;
>               if (ifr->ifr_mtu < ifp->if_mtu)
>                       pflow_flush(sc);
> +             mtx_enter(&sc->sc_mtx);
>               pflow_setmtu(sc, ifr->ifr_mtu);
> +             mtx_leave(&sc->sc_mtx);
>               break;
>  
>       case SIOCGETPFLOW:
>               bzero(&pflowr, sizeof(pflowr));
>  
> +             /* XXXSMP: enforce lock order */
> +             NET_UNLOCK();
> +             rw_enter_read(&sc->sc_lock);
> +             NET_LOCK();
>               if (sc->sc_flowsrc != NULL)
>                       memcpy(&pflowr.flowsrc, sc->sc_flowsrc,
>                           sc->sc_flowsrc->sa_len);
>               if (sc->sc_flowdst != NULL)
>                       memcpy(&pflowr.flowdst, sc->sc_flowdst,
>                           sc->sc_flowdst->sa_len);
> +             rw_exit_read(&sc->sc_lock);
> +             mtx_enter(&sc->sc_mtx);
>               pflowr.version = sc->sc_version;
> +             mtx_leave(&sc->sc_mtx);
>  
>               if ((error = copyout(&pflowr, ifr->ifr_data,
>                   sizeof(pflowr))))
> @@ -557,9 +576,11 @@ pflowioctl(struct ifnet *ifp, u_long cmd
>  
>               if ((ifp->if_flags & IFF_UP) && sc->so != NULL) {
>                       ifp->if_flags |= IFF_RUNNING;
> +                     mtx_enter(&sc->sc_mtx);
>                       sc->sc_gcounter=pflowstats.pflow_flows;
>                       if (sc->sc_version == PFLOW_PROTO_10)
>                               pflow_sendout_ipfix_tmpl(sc);
> +                     mtx_leave(&sc->sc_mtx);
>               } else
>                       ifp->if_flags &= ~IFF_RUNNING;
>               rw_exit_write(&sc->sc_lock);
> @@ -575,7 +596,6 @@ pflowioctl(struct ifnet *ifp, u_long cmd
>  int
>  pflow_calc_mtu(struct pflow_softc *sc, int mtu, int hdrsz)
>  {
> -
>       sc->sc_maxcount4 = (mtu - hdrsz -
>           sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_flow4);
>       sc->sc_maxcount6 = (mtu - hdrsz -
> @@ -622,6 +642,8 @@ pflow_get_mbuf(struct pflow_softc *sc, u
>       struct pflow_header      h;
>       struct mbuf             *m;
>  
> +     MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
> +
>       MGETHDR(m, M_DONTWAIT, MT_DATA);
>       if (m == NULL) {
>               pflowstats.pflow_onomem++;
> @@ -791,6 +813,7 @@ export_pflow(struct pf_state *st)
>       sk = st->key[st->direction == PF_IN ? PF_SK_WIRE : PF_SK_STACK];
>  
>       SLIST_FOREACH(sc, &pflowif_list, sc_next) {
> +             mtx_enter(&sc->sc_mtx);
>               switch (sc->sc_version) {
>               case PFLOW_PROTO_5:
>                       if( sk->af == AF_INET )
> @@ -803,6 +826,7 @@ export_pflow(struct pf_state *st)
>               default: /* NOTREACHED */
>                       break;
>               }
> +             mtx_leave(&sc->sc_mtx);
>       }
>  
>       return (0);
> @@ -864,6 +888,8 @@ copy_flow_to_m(struct pflow_flow *flow, 
>  {
>       int             ret = 0;
>  
> +     MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
> +
>       if (sc->sc_mbuf == NULL) {
>               if ((sc->sc_mbuf = pflow_get_mbuf(sc, 0)) == NULL)
>                       return (ENOBUFS);
> @@ -888,6 +914,8 @@ copy_flow_ipfix_4_to_m(struct pflow_ipfi
>  {
>       int             ret = 0;
>  
> +     MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
> +
>       if (sc->sc_mbuf == NULL) {
>               if ((sc->sc_mbuf =
>                   pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV4_ID)) == NULL) {
> @@ -915,6 +943,8 @@ copy_flow_ipfix_6_to_m(struct pflow_ipfi
>  {
>       int             ret = 0;
>  
> +     MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
> +
>       if (sc->sc_mbuf6 == NULL) {
>               if ((sc->sc_mbuf6 =
>                   pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV6_ID)) == NULL) {
> @@ -1011,6 +1041,7 @@ pflow_timeout(void *v)
>  {
>       struct pflow_softc      *sc = v;
>  
> +     mtx_enter(&sc->sc_mtx);
>       switch (sc->sc_version) {
>       case PFLOW_PROTO_5:
>               pflow_sendout_v5(sc);
> @@ -1021,6 +1052,7 @@ pflow_timeout(void *v)
>       default: /* NOTREACHED */
>               break;
>       }
> +     mtx_leave(&sc->sc_mtx);
>  }
>  
>  void
> @@ -1028,7 +1060,9 @@ pflow_timeout6(void *v)
>  {
>       struct pflow_softc      *sc = v;
>  
> +     mtx_enter(&sc->sc_mtx);
>       pflow_sendout_ipfix(sc, AF_INET6);
> +     mtx_leave(&sc->sc_mtx);
>  }
>  
>  void
> @@ -1036,12 +1070,15 @@ pflow_timeout_tmpl(void *v)
>  {
>       struct pflow_softc      *sc = v;
>  
> +     mtx_enter(&sc->sc_mtx);
>       pflow_sendout_ipfix_tmpl(sc);
> +     mtx_leave(&sc->sc_mtx);
>  }
>  
>  void
>  pflow_flush(struct pflow_softc *sc)
>  {
> +     mtx_enter(&sc->sc_mtx);
>       switch (sc->sc_version) {
>       case PFLOW_PROTO_5:
>               pflow_sendout_v5(sc);
> @@ -1053,6 +1090,7 @@ pflow_flush(struct pflow_softc *sc)
>       default: /* NOTREACHED */
>               break;
>       }
> +     mtx_leave(&sc->sc_mtx);
>  }
>  
>  int
> @@ -1063,6 +1101,8 @@ pflow_sendout_v5(struct pflow_softc *sc)
>       struct ifnet            *ifp = &sc->sc_if;
>       struct timespec         tv;
>  
> +     MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
> +
>       timeout_del(&sc->sc_tmo);
>  
>       if (m == NULL)
> @@ -1099,6 +1139,8 @@ pflow_sendout_ipfix(struct pflow_softc *
>       u_int32_t                        count;
>       int                              set_length;
>  
> +     MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
> +
>       switch (af) {
>       case AF_INET:
>               m = sc->sc_mbuf;
> @@ -1158,12 +1200,14 @@ pflow_sendout_ipfix_tmpl(struct pflow_so
>       struct pflow_v10_header         *h10;
>       struct ifnet                    *ifp = &sc->sc_if;
>  
> +     MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
> +
>       timeout_del(&sc->sc_tmo_tmpl);
>  
>       if (!(ifp->if_flags & IFF_RUNNING)) {
>               return (0);
>       }
> -     m = pflow_get_mbuf(NULL, 0);
> +     m = pflow_get_mbuf(sc, 0);
>       if (m == NULL)
>               return (0);
>       if (m_copyback(m, 0, sizeof(struct pflow_ipfix_tmpl),
> @@ -1196,6 +1240,8 @@ pflow_sendout_ipfix_tmpl(struct pflow_so
>  int
>  pflow_sendout_mbuf(struct pflow_softc *sc, struct mbuf *m)
>  {
> +     rw_assert_anylock(&sc->sc_lock);
> +
>       counters_pkt(sc->sc_if.if_counters,
>                   ifc_opackets, ifc_obytes, m->m_pkthdr.len);
>  
> Index: sys/net/if_pflow.h
> ===================================================================
> RCS file: /cvs/src/sys/net/if_pflow.h,v
> retrieving revision 1.19
> diff -u -p -r1.19 if_pflow.h
> --- sys/net/if_pflow.h        23 Nov 2022 15:12:27 -0000      1.19
> +++ sys/net/if_pflow.h        7 Dec 2023 14:58:18 -0000
> @@ -171,37 +171,42 @@ struct pflow_ipfix_flow6 {
>  
>  /*
>   * Locks used to protect struct members and global data
> + *       I       immutable after creation
>   *       N       net lock
> + *       m       this pflow_softc' `sc_mtx'
>   *       p       this pflow_softc' `sc_lock'
>   */
>  
>  struct pflow_softc {
> +     struct mutex             sc_mtx;
>       struct rwlock            sc_lock;
>  
>       int                      sc_dying;      /* [N] */
>       struct ifnet             sc_if;
>  
> -     unsigned int             sc_count;
> -     unsigned int             sc_count4;
> -     unsigned int             sc_count6;
> -     unsigned int             sc_maxcount;
> -     unsigned int             sc_maxcount4;
> -     unsigned int             sc_maxcount6;
> -     u_int64_t                sc_gcounter;
> -     u_int32_t                sc_sequence;
> +     unsigned int             sc_count;      /* [m] */
> +     unsigned int             sc_count4;     /* [m] */
> +     unsigned int             sc_count6;     /* [m] */
> +     unsigned int             sc_maxcount;   /* [m] */
> +     unsigned int             sc_maxcount4;  /* [m] */
> +     unsigned int             sc_maxcount6;  /* [m] */
> +     u_int64_t                sc_gcounter;   /* [m] */
> +     u_int32_t                sc_sequence;   /* [m] */
>       struct timeout           sc_tmo;
>       struct timeout           sc_tmo6;
>       struct timeout           sc_tmo_tmpl;
>       struct mbuf_queue        sc_outputqueue;
>       struct task              sc_outputtask;
>       struct socket           *so;            /* [p] */
> -     struct mbuf             *send_nam;
> -     struct sockaddr         *sc_flowsrc;
> -     struct sockaddr         *sc_flowdst;
> -     struct pflow_ipfix_tmpl  sc_tmpl_ipfix;
> -     u_int8_t                 sc_version;
> -     struct mbuf             *sc_mbuf;       /* current cumulative mbuf */
> -     struct mbuf             *sc_mbuf6;      /* current cumulative mbuf */
> +     struct mbuf             *send_nam;      /* [p] */
> +     struct sockaddr         *sc_flowsrc;    /* [p] */
> +     struct sockaddr         *sc_flowdst;    /* [p] */
> +     struct pflow_ipfix_tmpl  sc_tmpl_ipfix; /* [I] */
> +     u_int8_t                 sc_version;    /* [m] */
> +     struct mbuf             *sc_mbuf;       /* [m] current cumulative
> +                                                 mbuf */
> +     struct mbuf             *sc_mbuf6;      /* [m] current cumulative
> +                                                 mbuf */
>       SLIST_ENTRY(pflow_softc) sc_next;
>  };
>  
> 

Reply via email to