Module Name:    src
Committed By:   riastradh
Date:           Sun Jul 28 14:40:02 UTC 2024

Modified Files:
        src/sys/net: if_wg.c

Log Message:
wg(4): Fix session destruction.

Schedule destruction as soon as the session is created, to ensure key
erasure within 2*reject-after-time seconds.  Previously, we would
schedule destruction of the previous session 1 second after the next
one has been established.  Combined with a failure to update the
state machine on keepalive packets, this led to temporary deadlock
scenarios.

To keep it simple, there's just one callout which runs every
reject-after-time seconds and erases keys in sessions older than
reject-after-time, so if a session is established the moment after it
runs, the keys might not be erased until (2-eps)*reject-after-time
seconds.

PR kern/55729: net/if_wg/t_misc:wg_rekey test case fails
PR kern/56252: wg(4) state machine has race conditions
PR kern/58463: if_wg does not work when idle.


To generate a diff of this commit:
cvs rdiff -u -r1.99 -r1.100 src/sys/net/if_wg.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/net/if_wg.c
diff -u src/sys/net/if_wg.c:1.99 src/sys/net/if_wg.c:1.100
--- src/sys/net/if_wg.c:1.99	Sun Jul 28 14:39:35 2024
+++ src/sys/net/if_wg.c	Sun Jul 28 14:40:02 2024
@@ -1,4 +1,4 @@
-/*	$NetBSD: if_wg.c,v 1.99 2024/07/28 14:39:35 riastradh Exp $	*/
+/*	$NetBSD: if_wg.c,v 1.100 2024/07/28 14:40:02 riastradh Exp $	*/
 
 /*
  * Copyright (C) Ryota Ozaki <ozaki.ry...@gmail.com>
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_wg.c,v 1.99 2024/07/28 14:39:35 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_wg.c,v 1.100 2024/07/28 14:40:02 riastradh Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_altq_enabled.h"
@@ -1346,7 +1346,6 @@ wg_put_session_index(struct wg_softc *wg
 	struct wg_peer *wgp __diagused = wgs->wgs_peer;
 
 	KASSERT(mutex_owned(wgp->wgp_lock));
-	KASSERT(wgs == wgp->wgp_session_unstable);
 	KASSERT(wgs->wgs_state != WGS_STATE_UNKNOWN);
 	KASSERT(wgs->wgs_state != WGS_STATE_ESTABLISHED);
 
@@ -1651,7 +1650,6 @@ wg_handle_msg_init(struct wg_softc *wg, 
 		panic("unstable session can't be established");
 	case WGS_STATE_DESTROYING:	/* rekey initiated by peer */
 		WG_TRACE("Session destroying, but force to clear");
-		callout_halt(&wgp->wgp_session_dtor_timer, NULL);
 		wg_put_session_index(wg, wgs);
 		KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d",
 		    wgs->wgs_state);
@@ -1675,6 +1673,15 @@ wg_handle_msg_init(struct wg_softc *wg, 
 	wg_update_endpoint_if_necessary(wgp, src);
 
 	/*
+	 * Count the time of the INIT message as the time of
+	 * establishment -- this is used to decide when to erase keys,
+	 * and we want to start counting as soon as we have generated
+	 * keys.
+	 */
+	wgs->wgs_time_established = time_uptime;
+	wg_schedule_session_dtor_timer(wgp);
+
+	/*
 	 * Respond to the initiator with our ephemeral public key.
 	 */
 	(void)wg_send_handshake_msg_resp(wg, wgp, wgs, wgmi);
@@ -2109,6 +2116,7 @@ wg_handle_msg_resp(struct wg_softc *wg, 
 	KASSERTMSG(wgs->wgs_state == WGS_STATE_INIT_ACTIVE, "state=%d",
 	    wgs->wgs_state);
 	wgs->wgs_time_established = time_uptime;
+	wg_schedule_session_dtor_timer(wgp);
 	wgs->wgs_time_last_data_sent = 0;
 	wgs->wgs_is_initiator = true;
 	WG_DLOG("session[L=%"PRIx32" R=%"PRIx32"]:"
@@ -2179,9 +2187,6 @@ wg_handle_msg_resp(struct wg_softc *wg, 
 		    wgs_prev->wgs_local_index, wgs_prev->wgs_remote_index);
 		atomic_store_relaxed(&wgs_prev->wgs_state,
 		    WGS_STATE_DESTROYING);
-
-		/* We can't destroy the old session immediately */
-		wg_schedule_session_dtor_timer(wgp);
 	} else {
 		KASSERTMSG(wgs_prev->wgs_state == WGS_STATE_UNKNOWN,
 		    "state=%d", wgs_prev->wgs_state);
@@ -2604,6 +2609,7 @@ wg_session_dtor_timer(void *arg)
 
 	WG_TRACE("enter");
 
+	wg_schedule_session_dtor_timer(wgp);
 	wg_schedule_peer_task(wgp, WGP_TASK_DESTROY_PREV_SESSION);
 }
 
@@ -2611,8 +2617,19 @@ static void
 wg_schedule_session_dtor_timer(struct wg_peer *wgp)
 {
 
-	/* 1 second grace period */
-	callout_schedule(&wgp->wgp_session_dtor_timer, hz);
+	/*
+	 * If the periodic session destructor is already pending to
+	 * handle the previous session, that's fine -- leave it in
+	 * place; it will be scheduled again.
+	 */
+	if (callout_pending(&wgp->wgp_session_dtor_timer)) {
+		WG_DLOG("session dtor already pending\n");
+		return;
+	}
+
+	WG_DLOG("scheduling session dtor in %u secs\n", wg_reject_after_time);
+	callout_schedule(&wgp->wgp_session_dtor_timer,
+	    wg_reject_after_time*hz);
 }
 
 static bool
@@ -3205,7 +3222,6 @@ wg_task_establish_session(struct wg_soft
 		/* XXX Can this happen?  */
 		return;
 
-	wgs->wgs_time_established = time_uptime;
 	wgs->wgs_time_last_data_sent = 0;
 	wgs->wgs_is_initiator = false;
 
@@ -3265,9 +3281,6 @@ wg_task_establish_session(struct wg_soft
 		    wgs_prev->wgs_local_index, wgs_prev->wgs_remote_index);
 		atomic_store_relaxed(&wgs_prev->wgs_state,
 		    WGS_STATE_DESTROYING);
-
-		/* We can't destroy the old session immediately */
-		wg_schedule_session_dtor_timer(wgp);
 	} else {
 		KASSERTMSG(wgs_prev->wgs_state == WGS_STATE_UNKNOWN,
 		    "state=%d", wgs_prev->wgs_state);
@@ -3321,15 +3334,61 @@ static void
 wg_task_destroy_prev_session(struct wg_softc *wg, struct wg_peer *wgp)
 {
 	struct wg_session *wgs;
+	time_t age;
 
 	WG_TRACE("WGP_TASK_DESTROY_PREV_SESSION");
 
 	KASSERT(mutex_owned(wgp->wgp_lock));
 
+	/*
+	 * If theres's any previous unstable session, i.e., one that
+	 * was ESTABLISHED and is now DESTROYING, older than
+	 * reject-after-time, destroy it.  Upcoming sessions are still
+	 * in INIT_ACTIVE or INIT_PASSIVE -- we don't touch those here.
+	 */
 	wgs = wgp->wgp_session_unstable;
-	if (wgs->wgs_state == WGS_STATE_DESTROYING) {
+	KASSERT(wgs->wgs_state != WGS_STATE_ESTABLISHED);
+	if (wgs->wgs_state == WGS_STATE_DESTROYING &&
+	    ((age = (time_uptime - wgs->wgs_time_established)) >=
+		wg_reject_after_time)) {
+		WG_DLOG("destroying past session %"PRIuMAX" sec old\n",
+		    (uintmax_t)age);
 		wg_put_session_index(wg, wgs);
+		KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d",
+		    wgs->wgs_state);
+	}
+
+	/*
+	 * If theres's any ESTABLISHED stable session older than
+	 * reject-after-time, destroy it.  (The stable session can also
+	 * be in UNKNOWN state -- nothing to do in that case)
+	 */
+	wgs = wgp->wgp_session_stable;
+	KASSERT(wgs->wgs_state != WGS_STATE_INIT_ACTIVE);
+	KASSERT(wgs->wgs_state != WGS_STATE_INIT_PASSIVE);
+	KASSERT(wgs->wgs_state != WGS_STATE_DESTROYING);
+	if (wgs->wgs_state == WGS_STATE_ESTABLISHED &&
+	    ((age = (time_uptime - wgs->wgs_time_established)) >=
+		wg_reject_after_time)) {
+		WG_DLOG("destroying current session %"PRIuMAX" sec old\n",
+		    (uintmax_t)age);
+		atomic_store_relaxed(&wgs->wgs_state, WGS_STATE_DESTROYING);
+		wg_put_session_index(wg, wgs);
+		KASSERTMSG(wgs->wgs_state == WGS_STATE_UNKNOWN, "state=%d",
+		    wgs->wgs_state);
 	}
+
+	/*
+	 * If there's no sessions left, no need to have the timer run
+	 * until the next time around -- halt it.
+	 *
+	 * It is only ever scheduled with wgp_lock held or in the
+	 * callout itself, and callout_halt prevents rescheudling
+	 * itself, so this never races with rescheduling.
+	 */
+	if (wgp->wgp_session_unstable->wgs_state == WGS_STATE_UNKNOWN &&
+	    wgp->wgp_session_stable->wgs_state == WGS_STATE_UNKNOWN)
+		callout_halt(&wgp->wgp_session_dtor_timer, NULL);
 }
 
 static void

Reply via email to