Module Name: src Committed By: riastradh Date: Sun Jul 28 14:37:59 UTC 2024
Modified Files: src/sys/net: if_wg.c Log Message: wg(4): Rework some details of internal session state machine. This way: - There is a clear transition between when a session is being set up, and when it is exposed to the data rx path (wg_handle_msg_data): atomic_store_release to set wgs->wgs_state to INIT_PASSIVE or ESTABLISHED. (The transition INIT_PASSIVE -> ESTABLISHED is immaterial to the data rx path, so that's just atomic_store_relaxed. Similarly the transition to DESTROYING.) - There is a clear transition between when a session is being set up, and when it is exposed to the data tx path (wg_output): atomic_store_release to set wgp->wgp_session_stable to it. - Every path that reinitializes a session must go through wg_destroy_session via wg_put_index_session first. This avoids races between session reuse and the data rx/tx paths. - Add a log message at the time of every state transition. Prompted by: PR kern/55729: net/if_wg/t_misc:wg_rekey test case fails PR kern/56252: wg(4) state machine has race conditions PR kern/58463: if_wg does not work when idle. To generate a diff of this commit: cvs rdiff -u -r1.93 -r1.94 src/sys/net/if_wg.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/net/if_wg.c diff -u src/sys/net/if_wg.c:1.93 src/sys/net/if_wg.c:1.94 --- src/sys/net/if_wg.c:1.93 Sat Jul 27 15:45:20 2024 +++ src/sys/net/if_wg.c Sun Jul 28 14:37:59 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: if_wg.c,v 1.93 2024/07/27 15:45:20 christos Exp $ */ +/* $NetBSD: if_wg.c,v 1.94 2024/07/28 14:37:59 riastradh Exp $ */ /* * Copyright (C) Ryota Ozaki <ozaki.ry...@gmail.com> @@ -41,7 +41,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: if_wg.c,v 1.93 2024/07/27 15:45:20 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: if_wg.c,v 1.94 2024/07/28 14:37:59 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_altq_enabled.h" @@ -1282,8 +1282,16 @@ wg_destroy_session(struct wg_softc *wg, pserialize_perform(wgp->wgp_psz); psref_target_destroy(&wgs->wgs_psref, wg_psref_class); - /* Free memory, zero state, and transition to UNKNOWN. */ + /* + * Free memory, zero state, and transition to UNKNOWN. We have + * exclusive access to the session now, so there is no need for + * an atomic store. + */ thmap_gc(wg->wg_sessions_byindex, garbage); + WG_DLOG("session[L=%"PRIx32" R=%"PRIx32"] -> WGS_STATE_UNKNOWN\n", + wgs->wgs_local_index, wgs->wgs_remote_index); + wgs->wgs_local_index = 0; + wgs->wgs_remote_index = 0; wg_clear_states(wgs); wgs->wgs_state = WGS_STATE_UNKNOWN; } @@ -1306,7 +1314,8 @@ wg_get_session_index(struct wg_softc *wg KASSERT(mutex_owned(wgp->wgp_lock)); KASSERT(wgs == wgp->wgp_session_unstable); - KASSERT(wgs->wgs_state == WGS_STATE_UNKNOWN); + KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d", + wgs->wgs_state); do { /* Pick a uniform random index. */ @@ -1379,7 +1388,8 @@ wg_fill_msg_init(struct wg_softc *wg, st KASSERT(mutex_owned(wgp->wgp_lock)); KASSERT(wgs == wgp->wgp_session_unstable); - KASSERT(wgs->wgs_state == WGS_STATE_INIT_ACTIVE); + KASSERTMSG(wgs->wgs_state == WGS_STATE_INIT_ACTIVE, "state=%d", + wgs->wgs_state); wgmi->wgmi_type = htole32(WG_MSG_TYPE_INIT); wgmi->wgmi_sender = wgs->wgs_local_index; @@ -1622,29 +1632,40 @@ wg_handle_msg_init(struct wg_softc *wg, wgs = wgp->wgp_session_unstable; switch (wgs->wgs_state) { case WGS_STATE_UNKNOWN: /* new session initiated by peer */ - wg_get_session_index(wg, wgs); break; case WGS_STATE_INIT_ACTIVE: /* we're already initiating, drop */ + /* XXX Who wins if both sides send INIT? */ WG_TRACE("Session already initializing, ignoring the message"); goto out; case WGS_STATE_INIT_PASSIVE: /* peer is retrying, start over */ WG_TRACE("Session already initializing, destroying old states"); - wg_clear_states(wgs); - /* keep session index */ + /* + * XXX Avoid this -- just resend our response -- if the + * INIT message is identical to the previous one. + */ + wg_put_session_index(wg, wgs); + KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d", + wgs->wgs_state); break; case WGS_STATE_ESTABLISHED: /* can't happen */ panic("unstable session can't be established"); - break; case WGS_STATE_DESTROYING: /* rekey initiated by peer */ WG_TRACE("Session destroying, but force to clear"); callout_stop(&wgp->wgp_session_dtor_timer); - wg_clear_states(wgs); - /* keep session index */ + wg_put_session_index(wg, wgs); + KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d", + wgs->wgs_state); break; default: panic("invalid session state: %d", wgs->wgs_state); } - wgs->wgs_state = WGS_STATE_INIT_PASSIVE; + + /* + * Assign a fresh session index. + */ + KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d", + wgs->wgs_state); + wg_get_session_index(wg, wgs); memcpy(wgs->wgs_handshake_hash, hash, sizeof(hash)); memcpy(wgs->wgs_chaining_key, ckey, sizeof(ckey)); @@ -1653,11 +1674,36 @@ wg_handle_msg_init(struct wg_softc *wg, wg_update_endpoint_if_necessary(wgp, src); + /* + * Respond to the initiator with our ephemeral public key. + */ (void)wg_send_handshake_msg_resp(wg, wgp, wgs, wgmi); + WG_DLOG("session[L=%"PRIx32" R=%"PRIx32"]:" + " calculate keys as responder\n", + wgs->wgs_local_index, wgs->wgs_remote_index); wg_calculate_keys(wgs, false); wg_clear_states(wgs); + /* + * Session is ready to receive data now that we have received + * the peer initiator's ephemeral key pair, generated our + * responder's ephemeral key pair, and derived a session key. + * + * Transition from UNKNOWN to INIT_PASSIVE to publish it to the + * data rx path, wg_handle_msg_data, where the + * atomic_load_acquire matching this atomic_store_release + * happens. + * + * (Session is not, however, ready to send data until the peer + * has acknowledged our response by sending its first data + * packet. So don't swap the sessions yet.) + */ + WG_DLOG("session[L=%"PRIx32" R=%"PRIx32"] -> WGS_STATE_INIT_PASSIVE\n", + wgs->wgs_local_index, wgs->wgs_remote_index); + atomic_store_release(&wgs->wgs_state, WGS_STATE_INIT_PASSIVE); + WG_TRACE("WGS_STATE_INIT_PASSIVE"); + out: mutex_exit(wgp->wgp_lock); wg_put_peer(wgp, &psref_peer); @@ -1739,25 +1785,41 @@ wg_send_handshake_msg_init(struct wg_sof /* XXX pull dispatch out into wg_task_send_init_message */ switch (wgs->wgs_state) { case WGS_STATE_UNKNOWN: /* new session initiated by us */ - wg_get_session_index(wg, wgs); break; case WGS_STATE_INIT_ACTIVE: /* we're already initiating, stop */ WG_TRACE("Session already initializing, skip starting new one"); return EBUSY; case WGS_STATE_INIT_PASSIVE: /* peer was trying -- XXX what now? */ - WG_TRACE("Session already initializing, destroying old states"); - wg_clear_states(wgs); - /* keep session index */ - break; + WG_TRACE("Session already initializing, waiting for peer"); + return EBUSY; case WGS_STATE_ESTABLISHED: /* can't happen */ panic("unstable session can't be established"); - break; case WGS_STATE_DESTROYING: /* rekey initiated by us too early */ WG_TRACE("Session destroying"); - /* XXX should wait? */ - return EBUSY; + wg_put_session_index(wg, wgs); + KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d", + wgs->wgs_state); + break; } - wgs->wgs_state = WGS_STATE_INIT_ACTIVE; + + /* + * Assign a fresh session index. + */ + KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d", + wgs->wgs_state); + wg_get_session_index(wg, wgs); + + /* + * We have initiated a session. Transition to INIT_ACTIVE. + * This doesn't publish it for use in the data rx path, + * wg_handle_msg_data, or in the data tx path, wg_output -- we + * have to wait for the peer to respond with their ephemeral + * public key before we can derive a session key for tx/rx. + * Hence only atomic_store_relaxed. + */ + WG_DLOG("session[L=%"PRIx32" R=(unknown)] -> WGS_STATE_INIT_ACTIVE\n", + wgs->wgs_local_index); + atomic_store_relaxed(&wgs->wgs_state, WGS_STATE_INIT_ACTIVE); m = m_gethdr(M_WAIT, MT_DATA); if (sizeof(*wgmi) > MHLEN) { @@ -1799,7 +1861,8 @@ wg_fill_msg_resp(struct wg_softc *wg, st KASSERT(mutex_owned(wgp->wgp_lock)); KASSERT(wgs == wgp->wgp_session_unstable); - KASSERT(wgs->wgs_state == WGS_STATE_INIT_PASSIVE); + KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d", + wgs->wgs_state); memcpy(hash, wgs->wgs_handshake_hash, sizeof(hash)); memcpy(ckey, wgs->wgs_chaining_key, sizeof(ckey)); @@ -1889,11 +1952,13 @@ wg_swap_sessions(struct wg_peer *wgp) KASSERT(mutex_owned(wgp->wgp_lock)); wgs = wgp->wgp_session_unstable; - KASSERT(wgs->wgs_state == WGS_STATE_ESTABLISHED); + KASSERTMSG(wgs->wgs_state == WGS_STATE_ESTABLISHED, "state=%d", + wgs->wgs_state); wgs_prev = wgp->wgp_session_stable; - KASSERT(wgs_prev->wgs_state == WGS_STATE_ESTABLISHED || - wgs_prev->wgs_state == WGS_STATE_UNKNOWN); + KASSERTMSG((wgs_prev->wgs_state == WGS_STATE_ESTABLISHED || + wgs_prev->wgs_state == WGS_STATE_UNKNOWN), + "state=%d", wgs_prev->wgs_state); atomic_store_release(&wgp->wgp_session_stable, wgs); wgp->wgp_session_unstable = wgs_prev; } @@ -2041,17 +2106,38 @@ wg_handle_msg_resp(struct wg_softc *wg, wgs->wgs_remote_index = wgmr->wgmr_sender; WG_DLOG("receiver=%x\n", wgs->wgs_remote_index); - KASSERT(wgs->wgs_state == WGS_STATE_INIT_ACTIVE); - wgs->wgs_state = WGS_STATE_ESTABLISHED; + KASSERTMSG(wgs->wgs_state == WGS_STATE_INIT_ACTIVE, "state=%d", + wgs->wgs_state); wgs->wgs_time_established = time_uptime; wgs->wgs_time_last_data_sent = 0; wgs->wgs_is_initiator = true; + WG_DLOG("session[L=%"PRIx32" R=%"PRIx32"]:" + " calculate keys as initiator\n", + wgs->wgs_local_index, wgs->wgs_remote_index); wg_calculate_keys(wgs, true); wg_clear_states(wgs); + + /* + * Session is ready to receive data now that we have received + * the responder's response. + * + * Transition from INIT_ACTIVE to ESTABLISHED to publish it to + * the data rx path, wg_handle_msg_data. + */ + WG_DLOG("session[L=%"PRIx32" R=%"PRIx32" -> WGS_STATE_ESTABLISHED\n", + wgs->wgs_local_index, wgs->wgs_remote_index); + atomic_store_release(&wgs->wgs_state, WGS_STATE_ESTABLISHED); WG_TRACE("WGS_STATE_ESTABLISHED"); callout_stop(&wgp->wgp_handshake_timeout_timer); + /* + * Session is ready to send data now that we have received the + * responder's response. + * + * Swap the sessions to publish the new one as the stable + * session for the data tx path, wg_output. + */ wg_swap_sessions(wgp); KASSERT(wgs == wgp->wgp_session_stable); wgs_prev = wgp->wgp_session_unstable; @@ -2087,8 +2173,17 @@ wg_handle_msg_resp(struct wg_softc *wg, /* Wait for wg_get_stable_session to drain. */ pserialize_perform(wgp->wgp_psz); - /* Transition ESTABLISHED->DESTROYING. */ - wgs_prev->wgs_state = WGS_STATE_DESTROYING; + /* + * Transition ESTABLISHED->DESTROYING. The session + * will remain usable for the data rx path to process + * packets still in flight to us, but we won't use it + * for data tx. + */ + WG_DLOG("session[L=%"PRIx32" R=%"PRIx32"]" + " -> WGS_STATE_DESTROYING\n", + wgs_prev->wgs_local_index, wgs_prev->wgs_remote_index); + atomic_store_relaxed(&wgs_prev->wgs_state, + WGS_STATE_DESTROYING); /* We can't destroy the old session immediately */ wg_schedule_session_dtor_timer(wgp); @@ -2112,7 +2207,8 @@ wg_send_handshake_msg_resp(struct wg_sof KASSERT(mutex_owned(wgp->wgp_lock)); KASSERT(wgs == wgp->wgp_session_unstable); - KASSERT(wgs->wgs_state == WGS_STATE_INIT_PASSIVE); + KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d", + wgs->wgs_state); m = m_gethdr(M_WAIT, MT_DATA); if (sizeof(*wgmr) > MHLEN) { @@ -2329,8 +2425,11 @@ wg_lookup_session_by_index(struct wg_sof int s = pserialize_read_enter(); wgs = thmap_get(wg->wg_sessions_byindex, &index, sizeof index); if (wgs != NULL) { - KASSERT(atomic_load_relaxed(&wgs->wgs_state) != - WGS_STATE_UNKNOWN); + uint32_t oindex __diagused = + atomic_load_relaxed(&wgs->wgs_local_index); + KASSERTMSG(index == oindex, + "index=%"PRIx32" wgs->wgs_local_index=%"PRIx32, + index, oindex); psref_acquire(psref, &wgs->wgs_psref, wg_psref_class); } pserialize_read_exit(s); @@ -2612,12 +2711,15 @@ wg_handle_msg_data(struct wg_softc *wg, * We are only ready to handle data when in INIT_PASSIVE, * ESTABLISHED, or DESTROYING. All transitions out of that * state dissociate the session index and drain psrefs. + * + * atomic_load_acquire matches atomic_store_release in either + * wg_handle_msg_init or wg_handle_msg_resp. (The transition + * INIT_PASSIVE to ESTABLISHED in wg_task_establish_session + * doesn't make a difference for this rx path.) */ - state = atomic_load_relaxed(&wgs->wgs_state); + state = atomic_load_acquire(&wgs->wgs_state); switch (state) { case WGS_STATE_UNKNOWN: - panic("wg session %p in unknown state has session index %u", - wgs, wgmd->wgmd_receiver); case WGS_STATE_INIT_ACTIVE: WG_TRACE("not yet ready for data"); goto out; @@ -2853,6 +2955,15 @@ wg_handle_msg_cookie(struct wg_softc *wg goto out; } + /* + * wgp_last_sent_mac1_valid is only set to true when we are + * transitioning to INIT_ACTIVE or INIT_PASSIVE, and always + * cleared on transition out of them. + */ + KASSERTMSG((wgs->wgs_state == WGS_STATE_INIT_ACTIVE || + wgs->wgs_state == WGS_STATE_INIT_PASSIVE), + "state=%d", wgs->wgs_state); + /* Decrypt the cookie and store it for later handshake retry. */ wg_algo_mac_cookie(key, sizeof(key), wgp->wgp_pubkey, sizeof(wgp->wgp_pubkey)); @@ -3099,12 +3210,33 @@ wg_task_establish_session(struct wg_soft /* XXX Can this happen? */ return; - wgs->wgs_state = WGS_STATE_ESTABLISHED; wgs->wgs_time_established = time_uptime; wgs->wgs_time_last_data_sent = 0; wgs->wgs_is_initiator = false; + + /* + * Session was already ready to receive data. Transition from + * INIT_PASSIVE to ESTABLISHED just so we can swap the + * sessions. + * + * atomic_store_relaxed because this doesn't affect the data rx + * path, wg_handle_msg_data -- changing from INIT_PASSIVE to + * ESTABLISHED makes no difference to the data rx path, and the + * transition to INIT_PASSIVE with store-release already + * published the state needed by the data rx path. + */ + WG_DLOG("session[L=%"PRIx32" R=%"PRIx32"] -> WGS_STATE_ESTABLISHED\n", + wgs->wgs_local_index, wgs->wgs_remote_index); + atomic_store_relaxed(&wgs->wgs_state, WGS_STATE_ESTABLISHED); WG_TRACE("WGS_STATE_ESTABLISHED"); + /* + * Session is ready to send data too now that we have received + * the peer initiator's first data packet. + * + * Swap the sessions to publish the new one as the stable + * session for the data tx path, wg_output. + */ wg_swap_sessions(wgp); KASSERT(wgs == wgp->wgp_session_stable); wgs_prev = wgp->wgp_session_unstable; @@ -3130,15 +3262,29 @@ wg_task_establish_session(struct wg_soft /* Wait for wg_get_stable_session to drain. */ pserialize_perform(wgp->wgp_psz); - /* Transition ESTABLISHED->DESTROYING. */ - wgs_prev->wgs_state = WGS_STATE_DESTROYING; + /* + * Transition ESTABLISHED->DESTROYING. The session + * will remain usable for the data rx path to process + * packets still in flight to us, but we won't use it + * for data tx. + */ + WG_DLOG("session[L=%"PRIx32" R=%"PRIx32"]" + " -> WGS_STATE_DESTROYING\n", + wgs_prev->wgs_local_index, wgs_prev->wgs_remote_index); + atomic_store_relaxed(&wgs_prev->wgs_state, + WGS_STATE_DESTROYING); /* We can't destroy the old session immediately */ wg_schedule_session_dtor_timer(wgp); } else { KASSERTMSG(wgs_prev->wgs_state == WGS_STATE_UNKNOWN, "state=%d", wgs_prev->wgs_state); - wg_clear_states(wgs_prev); + WG_DLOG("session[L=%"PRIx32" R=%"PRIx32"]" + " -> WGS_STATE_UNKNOWN\n", + wgs_prev->wgs_local_index, wgs_prev->wgs_remote_index); + wgs_prev->wgs_local_index = 0; /* paranoia */ + wgs_prev->wgs_remote_index = 0; /* paranoia */ + wg_clear_states(wgs_prev); /* paranoia */ wgs_prev->wgs_state = WGS_STATE_UNKNOWN; } } @@ -3326,9 +3472,10 @@ wg_overudp_cb(struct mbuf **mp, int offs WG_DLOG("type=%d\n", le32toh(wgm.wgm_type)); /* - * Handle DATA packets promptly as they arrive. Other packets - * may require expensive public-key crypto and are not as - * sensitive to latency, so defer them to the worker thread. + * Handle DATA packets promptly as they arrive, if they are in + * an active session. Other packets may require expensive + * public-key crypto and are not as sensitive to latency, so + * defer them to the worker thread. */ switch (le32toh(wgm.wgm_type)) { case WG_MSG_TYPE_DATA: