The branch main has been updated by bz: URL: https://cgit.FreeBSD.org/src/commit/?id=b8dfc3ecf7031a0a7764cdb67d85ebe0c03d5d93
commit b8dfc3ecf7031a0a7764cdb67d85ebe0c03d5d93 Author: Bjoern A. Zeeb <b...@freebsd.org> AuthorDate: 2025-03-06 13:17:19 +0000 Commit: Bjoern A. Zeeb <b...@freebsd.org> CommitDate: 2025-03-06 23:02:08 +0000 LinuxKPI: 802.11: improve key update locking to work around net80211 As indicated in 11db70b6057e there was another panic on key removal which could no longer be reproduced. As originally assumed the problem was "hidden" by commit 9763fec11b83 as mentioned in 11db70b6057e. Said commit had logic inverted and 27bf5c405bf2 fixed that and with that the possible panic came back. The problem exists because some code paths out of net80211 are locked while others are not. This opens a possible race in net80211 which was tracked by extra logging in (*iv_key_update_begin)() (log lines shortend): key_update_begin: tid 100112 vap X nt Y unlocked key_update_begin: tid 100133 vap X nt Y locked One thread can be wpa_supplicant, the other is driven from the driver net80211 taskq. Further LinuxKPI needs to unlock (conditionally in case the lock is held) as a downcall to the driver/FW may sleep. This opens up possibilities for said race even further so that we observe it more reliably. This all leads to one thread calling down into the driver/firmware (unlocked) while the other will get to the same place (after acquiring the wiphy lock) before the nt re-lock happens and thus state checks did not catch this either. For LinuxKPI work around the problem utilizing (*iv_key_update_begin/end)() and taking the wiphy_lock() there holding it over the entire operation. Given we still have to conditionally unlock we need to keep track from _begin to _end on whether we have to re-lock. The checks for this need to be done under the wiphy_lock(). While a bool would suffice we use a refcount to make any future debugging easier. This isn't the most elegant solution but having the wiphy lock covering the full operation allows the 2nd thread to later run through the same code path and find the key gone (which we already checked). It remains questionable if (*iv_key_update_begin/end)() is the correct solution (as there are futher callers covering which would not need the unlock cycle) or if it should be done in the current code. The former will allow us to cover a full key store which we will need in case we will implement suspend/resume beyond what is done in native drivers/net80211 currently, if we will factor out the crypto locking for good, and fix the inconsistent locking of the nt (NODE) lock in net80211. Alternate solutions were discussed on freebsd-wireless today (2025-03-06, in the thread "Re: HEADS UP! Do not update on main currently (panic - on boot)"). Sponsored by: The FreeBSD Foundation MFC after: 3 days X-MFC with: 27bf5c405bf2 Differential Revision: https://reviews.freebsd.org/D49256 --- sys/compat/linuxkpi/common/src/linux_80211.c | 99 +++++++++++++++++++++++----- sys/compat/linuxkpi/common/src/linux_80211.h | 2 + 2 files changed, 85 insertions(+), 16 deletions(-) diff --git a/sys/compat/linuxkpi/common/src/linux_80211.c b/sys/compat/linuxkpi/common/src/linux_80211.c index be0006769e33..ec1798d2e886 100644 --- a/sys/compat/linuxkpi/common/src/linux_80211.c +++ b/sys/compat/linuxkpi/common/src/linux_80211.c @@ -1135,9 +1135,7 @@ _lkpi_iv_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k) struct ieee80211_sta *sta; struct ieee80211_node *ni; struct ieee80211_key_conf *kc; - struct ieee80211_node_table *nt; int error; - bool islocked; ic = vap->iv_ic; lhw = ic->ic_softc; @@ -1172,13 +1170,6 @@ _lkpi_iv_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k) return (1); } - /* This is inconsistent net80211 locking to be fixed one day. */ - nt = &ic->ic_sta; - islocked = IEEE80211_NODE_IS_LOCKED(nt); - if (islocked) - IEEE80211_NODE_UNLOCK(nt); - - wiphy_lock(hw->wiphy); kc = lsta->kc[k->wk_keyix]; /* Re-check under lock. */ if (kc == NULL) { @@ -1210,9 +1201,6 @@ _lkpi_iv_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k) free(kc, M_LKPI80211); error = 1; out: - wiphy_unlock(hw->wiphy); - if (islocked) - IEEE80211_NODE_LOCK(nt); ieee80211_free_node(ni); return (error); } @@ -1262,7 +1250,6 @@ _lkpi_iv_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k) } sta = LSTA_TO_STA(lsta); - wiphy_lock(hw->wiphy); if (lsta->kc[k->wk_keyix] != NULL) { IMPROVE("Still in firmware? Del first. Can we assert this cannot happen?"); ic_printf(ic, "%s: sta %6D found with key information\n", @@ -1283,7 +1270,6 @@ _lkpi_iv_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k) ic_printf(ic, "%s: CIPHER SUITE %#x (%s) not supported\n", __func__, lcipher, lkpi_cipher_suite_to_name(lcipher)); IMPROVE(); - wiphy_unlock(hw->wiphy); ieee80211_free_node(ni); return (0); } @@ -1324,7 +1310,6 @@ _lkpi_iv_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k) __func__, SET_KEY, "SET", sta->addr, ":", error); lsta->kc[k->wk_keyix] = NULL; free(kc, M_LKPI80211); - wiphy_unlock(hw->wiphy); ieee80211_free_node(ni); return (0); } @@ -1337,7 +1322,6 @@ _lkpi_iv_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k) kc, kc->keyidx, kc->hw_key_idx, kc->flags); #endif - wiphy_unlock(hw->wiphy); ieee80211_free_node(ni); return (1); } @@ -1348,6 +1332,86 @@ lkpi_iv_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k) return (_lkpi_iv_key_set(vap, k)); } + +static void +lkpi_iv_key_update_begin(struct ieee80211vap *vap) +{ + struct ieee80211_node_table *nt; + struct ieee80211com *ic; + struct lkpi_hw *lhw; + struct ieee80211_hw *hw; + struct lkpi_vif *lvif; + bool islocked; + + ic = vap->iv_ic; + lhw = ic->ic_softc; + hw = LHW_TO_HW(lhw); + lvif = VAP_TO_LVIF(vap); + nt = &ic->ic_sta; + + islocked = IEEE80211_NODE_IS_LOCKED(nt); + +#ifdef LINUXKPI_DEBUG_80211 + if (linuxkpi_debug_80211 & D80211_TRACE_HW_CRYPTO) + ic_printf(vap->iv_ic, "%s: tid %d vap %p nt %p %slocked " + "lvif nt_unlocked %d\n", __func__, curthread->td_tid, + vap, nt, islocked ? "" : "un", lvif->nt_unlocked); +#endif + + /* This is inconsistent net80211 locking to be fixed one day. */ + if (islocked) + IEEE80211_NODE_UNLOCK(nt); + + wiphy_lock(hw->wiphy); + + /* + * nt_unlocked could be a bool given we are under the lock and there + * must only be a single thread. + * In case anything in the future disturbs the order the refcnt will + * help us catching problems a lot easier. + */ + if (islocked) + refcount_acquire(&lvif->nt_unlocked); +} + +static void +lkpi_iv_key_update_end(struct ieee80211vap *vap) +{ + struct ieee80211_node_table *nt; + struct ieee80211com *ic; + struct lkpi_hw *lhw; + struct ieee80211_hw *hw; + struct lkpi_vif *lvif; + bool islocked; + + ic = vap->iv_ic; + lhw = ic->ic_softc; + hw = LHW_TO_HW(lhw); + lvif = VAP_TO_LVIF(vap); + nt = &ic->ic_sta; + + islocked = IEEE80211_NODE_IS_LOCKED(nt); + MPASS(!islocked); + +#ifdef LINUXKPI_DEBUG_80211 + if (linuxkpi_debug_80211 & D80211_TRACE_HW_CRYPTO) + ic_printf(vap->iv_ic, "%s: tid %d vap %p nt %p %slocked " + "lvif nt_unlocked %d\n", __func__, curthread->td_tid, + vap, nt, islocked ? "" : "un", lvif->nt_unlocked); +#endif + + /* + * Check under lock; see comment in lkpi_iv_key_update_begin(). + * In case the refcnt gets out of sync locking in net80211 will + * quickly barf as well (trying to unlock a lock not held). + */ + islocked = refcount_release_if_last(&lvif->nt_unlocked); + wiphy_unlock(hw->wiphy); + + /* This is inconsistent net80211 locking to be fixed one day. */ + if (islocked) + IEEE80211_NODE_LOCK(nt); +} #endif static u_int @@ -3413,6 +3477,7 @@ lkpi_ic_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], mtx_init(&lvif->mtx, "lvif", NULL, MTX_DEF); INIT_LIST_HEAD(&lvif->lsta_list); lvif->lvif_bss = NULL; + refcount_init(&lvif->nt_unlocked, 0); lvif->lvif_bss_synched = false; vap = LVIF_TO_VAP(lvif); @@ -3545,6 +3610,8 @@ lkpi_ic_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], if (lkpi_hwcrypto && lhw->ops->set_key != NULL) { vap->iv_key_set = lkpi_iv_key_set; vap->iv_key_delete = lkpi_iv_key_delete; + vap->iv_key_update_begin = lkpi_iv_key_update_begin; + vap->iv_key_update_end = lkpi_iv_key_update_end; } #endif diff --git a/sys/compat/linuxkpi/common/src/linux_80211.h b/sys/compat/linuxkpi/common/src/linux_80211.h index b17e6072066c..c2d29b2dcc4b 100644 --- a/sys/compat/linuxkpi/common/src/linux_80211.h +++ b/sys/compat/linuxkpi/common/src/linux_80211.h @@ -186,6 +186,8 @@ struct lkpi_vif { struct list_head lsta_list; struct lkpi_sta *lvif_bss; + + int nt_unlocked; /* Count of nt unlocks pending (*mo_set_key) */ bool lvif_bss_synched; bool added_to_drv; /* Driver knows; i.e. we called add_interface(). */