Hi Vitaliy

We still didn't have opportunity to test this. As soon as we can we will test 
it and let you know.

Srdačan pozdrav / Best regards
-- 
Nemanja Domazetović
Senior IT Network inženjer
Kappa Star Group,
Bulevar kneza Aleksandra Karađorđevića 36,
11000 Beograd,
Srbija
e-mail: nemanja.domazeto...@kappastar.com
web: https://www.kappastar.com
Čuvajte drveće. Nemojte štampati ovu poruku ako to nije neophodno. / Please 
consider the environment before printing this email.

-----Original Message-----
From: Vitaliy Makkoveev <m...@openbsd.org> 
Sent: Thursday, March 7, 2024 11:47 AM
To: Nemanja Domazetović <nemanja.domazeto...@kappastar.com>
Cc: bugs@openbsd.org; Claudio Jeker <clau...@openbsd.org>; Alexander Haensch 
<alexander.haen...@ipc.uni-tuebingen.de>; Vitaliy Makkoveev <o...@bsdbox.dev>
Subject: Re: wireguard problem?

On Tue, Mar 05, 2024 at 11:31:40AM +0300, Vitaliy Makkoveev wrote:
> 
> I suspect the races with wg_peer_destroy().
> 

Does this diff help?

Index: sys/dev/dt/dt_prov_static.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_prov_static.c,v
retrieving revision 1.22
diff -u -p -r1.22 dt_prov_static.c
--- sys/dev/dt/dt_prov_static.c 28 Aug 2023 14:50:01 -0000      1.22
+++ sys/dev/dt/dt_prov_static.c 23 Jan 2024 11:42:24 -0000
@@ -102,6 +102,7 @@ DT_STATIC_PROBE3(refcnt, inpcb, "void *"
 DT_STATIC_PROBE3(refcnt, rtentry, "void *", "int", "int");  
DT_STATIC_PROBE3(refcnt, syncache, "void *", "int", "int");  
DT_STATIC_PROBE3(refcnt, tdb, "void *", "int", "int");
+DT_STATIC_PROBE3(refcnt, wg_peer, "void *", "int", "int");
 
 /*
  * List of all static probes
@@ -155,6 +156,7 @@ struct dt_probe *const dtps_static[] = {
        &_DT_STATIC_P(refcnt, rtentry),
        &_DT_STATIC_P(refcnt, syncache),
        &_DT_STATIC_P(refcnt, tdb),
+       &_DT_STATIC_P(refcnt, wg_peer),
 };
 
 struct dt_probe *const *dtps_index_refcnt;
Index: sys/net/if_wg.c
===================================================================
RCS file: /cvs/src/sys/net/if_wg.c,v
retrieving revision 1.36
diff -u -p -r1.36 if_wg.c
--- sys/net/if_wg.c     18 Jan 2024 08:46:41 -0000      1.36
+++ sys/net/if_wg.c     23 Jan 2024 11:42:27 -0000
@@ -30,6 +30,7 @@
 #include <sys/percpu.h>
 #include <sys/ioctl.h>
 #include <sys/mbuf.h>
+#include <sys/refcnt.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
@@ -190,6 +191,9 @@ struct wg_ring {
 struct wg_peer {
        LIST_ENTRY(wg_peer)      p_pubkey_entry;
        TAILQ_ENTRY(wg_peer)     p_seq_entry;
+
+       struct refcnt            p_refcnt;
+
        uint64_t                 p_id;
        struct wg_softc         *p_sc;
 
@@ -270,6 +274,7 @@ struct wg_peer *
        wg_peer_create(struct wg_softc *, uint8_t[WG_KEY_SIZE]);  struct 
wg_peer *
        wg_peer_lookup(struct wg_softc *, const uint8_t[WG_KEY_SIZE]);
+void   wg_peer_rele(struct wg_peer *);
 void   wg_peer_destroy(struct wg_peer *);
 void   wg_peer_set_endpoint_from_tag(struct wg_peer *, struct wg_tag *);
 void   wg_peer_set_sockaddr(struct wg_peer *, struct sockaddr *);
@@ -398,6 +403,8 @@ wg_peer_create(struct wg_softc *sc, uint
        if ((peer = pool_get(&wg_peer_pool, PR_NOWAIT)) == NULL)
                return NULL;
 
+       refcnt_init_trace(&peer->p_refcnt, DT_REFCNT_IDX_WGPEER);
+
        peer->p_id = peer_counter++;
        peer->p_sc = sc;
 
@@ -474,6 +481,22 @@ done:
 }
 
 void
+wg_peer_rele(struct wg_peer *peer)
+{
+       struct wg_softc         *sc = peer->p_sc;
+
+       if (refcnt_rele(&peer->p_refcnt) == 0)
+               return;
+
+       if (!mq_empty(&peer->p_stage_queue))
+               mq_purge(&peer->p_stage_queue);
+
+       DPRINTF(sc, "Peer %llu destroyed\n", peer->p_id);
+       explicit_bzero(peer, sizeof(*peer));
+       pool_put(&wg_peer_pool, peer);
+}
+
+void
 wg_peer_destroy(struct wg_peer *peer)
 {
        struct wg_softc *sc = peer->p_sc;
@@ -507,31 +530,10 @@ wg_peer_destroy(struct wg_peer *peer)
 
        noise_remote_clear(&peer->p_remote);
 
-       NET_LOCK();
-       while (!ifq_empty(&sc->sc_if.if_snd)) {
-               /*
-                * XXX: `if_snd' of stopped interface could still
-                * contain packets
-                */
-               if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
-                       ifq_purge(&sc->sc_if.if_snd);
-                       continue;
-               }
-               NET_UNLOCK();
-               tsleep_nsec(&nowake, PWAIT, "wg_ifq", 1000);
-               NET_LOCK();
-       }
-       NET_UNLOCK();
-
        taskq_barrier(wg_crypt_taskq);
        taskq_barrier(net_tq(sc->sc_if.if_index));
 
-       if (!mq_empty(&peer->p_stage_queue))
-               mq_purge(&peer->p_stage_queue);
-
-       DPRINTF(sc, "Peer %llu destroyed\n", peer->p_id);
-       explicit_bzero(peer, sizeof(*peer));
-       pool_put(&wg_peer_pool, peer);
+       wg_peer_rele(peer);
 }
 
 void
@@ -619,6 +621,7 @@ wg_aip_add(struct wg_softc *sc, struct w
        node = art_insert(root, &aip->a_node, &d->a_addr, d->a_cidr);
 
        if (node == &aip->a_node) {
+               refcnt_take(&peer->p_refcnt);
                aip->a_peer = peer;
                aip->a_data = *d;
                LIST_INSERT_HEAD(&peer->p_aip, aip, a_entry); @@ -627,9 +630,13 
@@ wg_aip_add(struct wg_softc *sc, struct w
                pool_put(&wg_aip_pool, aip);
                aip = (struct wg_aip *) node;
                if (aip->a_peer != peer) {
+                       struct wg_peer *peer_old = aip->a_peer;
+
+                       refcnt_take(&peer->p_refcnt);
                        LIST_REMOVE(aip, a_entry);
                        LIST_INSERT_HEAD(&peer->p_aip, aip, a_entry);
                        aip->a_peer = peer;
+                       wg_peer_rele(peer_old);
                }
        }
        rw_exit_write(&root->ar_lock);
@@ -641,11 +648,18 @@ wg_aip_lookup(struct art_root *root, voi  {
        struct srp_ref   sr;
        struct art_node *node;
+       struct wg_peer  *peer = NULL;
 
        node = art_match(root, addr, &sr);
+
+       if (node) {
+               peer = ((struct wg_aip *) node)->a_peer;
+               refcnt_take(&peer->p_refcnt);
+       }
+
        srp_leave(&sr);
 
-       return node == NULL ? NULL : ((struct wg_aip *) node)->a_peer;
+       return peer;
 }
 
 int
@@ -678,6 +692,7 @@ wg_aip_remove(struct wg_softc *sc, struc
                sc->sc_aip_num--;
                LIST_REMOVE(aip, a_entry);
                pool_put(&wg_aip_pool, aip);
+               wg_peer_rele(peer);
        }
 
        srp_leave(&sr);
@@ -1672,6 +1687,9 @@ wg_decap(struct wg_softc *sc, struct mbu
                goto error;
        }
 
+       if (allowed_peer)
+               wg_peer_rele(allowed_peer);
+
        if (__predict_false(peer != allowed_peer)) {
                DPRINTF(sc, "Packet has unallowed src IP from peer "
                    "%llu\n", peer->p_id);
@@ -2092,7 +2110,6 @@ wg_qstart(struct ifqueue *ifq)
        struct ifnet            *ifp = ifq->ifq_if;
        struct wg_softc         *sc = ifp->if_softc;
        struct wg_peer          *peer;
-       struct wg_tag           *t;
        struct mbuf             *m;
        SLIST_HEAD(,wg_peer)     start_list;
 
@@ -2104,14 +2121,34 @@ wg_qstart(struct ifqueue *ifq)
         * time.
         */
        while ((m = ifq_dequeue(ifq)) != NULL) {
-               t = wg_tag_get(m);
-               peer = t->t_peer;
+               switch (m->m_pkthdr.ph_family) {
+               case AF_INET:
+                       peer = wg_aip_lookup(sc->sc_aip4,
+                           &mtod(m, struct ip *)->ip_dst);
+                       break;
+#ifdef INET6
+               case AF_INET6:
+                       peer = wg_aip_lookup(sc->sc_aip6,
+                           &mtod(m, struct ip6_hdr *)->ip6_dst);
+                       break;
+#endif
+               default:
+                       m_freem(m);
+                       continue;
+               }
+
+               if (peer == NULL) {
+                       m_freem(m);
+                       continue;
+               }
+
                if (mq_push(&peer->p_stage_queue, m) != 0)
                        counters_inc(ifp->if_counters, ifc_oqdrops);
                if (!peer->p_start_onlist) {
                        SLIST_INSERT_HEAD(&start_list, peer, p_start_list);
                        peer->p_start_onlist = 1;
-               }
+               } else
+                       wg_peer_rele(peer);
        }
        SLIST_FOREACH(peer, &start_list, p_start_list) {
                if (noise_remote_ready(&peer->p_remote) == 0) @@ -2119,7 
+2156,9 @@ wg_qstart(struct ifqueue *ifq)
                else
                        wg_timers_event_want_initiation(&peer->p_timers);
                peer->p_start_onlist = 0;
+               wg_peer_rele(peer);
        }
+
        task_add(wg_crypt_taskq, &sc->sc_encap);  }
 
@@ -2169,26 +2208,13 @@ wg_output(struct ifnet *ifp, struct mbuf
                DPRINTF(sc, "No valid endpoint has been configured or "
                                "discovered for peer %llu\n", peer->p_id);
                ret = EDESTADDRREQ;
-               goto error;
+               goto rele;
        }
 
        if (m->m_pkthdr.ph_loopcnt++ > M_MAXLOOP) {
                DPRINTF(sc, "Packet looped\n");
                ret = ELOOP;
-               goto error;
-       }
-
-       /*
-        * As we hold a reference to peer in the mbuf, we can't handle a
-        * delayed packet without doing some refcnting. If a peer is removed
-        * while a delayed holds a reference, bad things will happen. For the
-        * time being, delayed packets are unsupported. This may be fixed with
-        * another aip_lookup in wg_qstart, or refcnting as mentioned before.
-        */
-       if (m->m_pkthdr.pf.delay > 0) {
-               DPRINTF(sc, "PF delay unsupported\n");
-               ret = EOPNOTSUPP;
-               goto error;
+               goto rele;
        }
 
        t->t_peer = peer;
@@ -2196,12 +2222,16 @@ wg_output(struct ifnet *ifp, struct mbuf
        t->t_done = 0;
        t->t_mtu = ifp->if_mtu;
 
+       wg_peer_rele(peer);
+
        /*
         * We still have an issue with ifq that will count a packet that gets
         * dropped in wg_qstart, or not encrypted. These get counted as
         * ofails or oqdrops, so the packet gets counted twice.
         */
        return if_enqueue(ifp, m);
+rele:
+       wg_peer_rele(peer);
 error:
        counters_inc(ifp->if_counters, ifc_oerrors);
        m_freem(m);
Index: sys/sys/refcnt.h
===================================================================
RCS file: /cvs/src/sys/sys/refcnt.h,v
retrieving revision 1.12
diff -u -p -r1.12 refcnt.h
--- sys/sys/refcnt.h    28 Aug 2023 14:50:02 -0000      1.12
+++ sys/sys/refcnt.h    23 Jan 2024 11:42:27 -0000
@@ -51,6 +51,7 @@ unsigned int  refcnt_read(struct refcnt *
 #define DT_REFCNT_IDX_RTENTRY  5
 #define DT_REFCNT_IDX_SYNCACHE 6
 #define DT_REFCNT_IDX_TDB      7
+#define DT_REFCNT_IDX_WGPEER   8
 
 #endif /* _KERNEL */
 

Reply via email to