On Tue, May 03, 2022 at 09:31:51PM +0200, Alexander Bluhm wrote: > On Tue, May 03, 2022 at 07:42:34PM +0200, Moritz Buhl wrote: > > commit 4b3977248902c22d96aaebdb5784840debc2631c > > Author: mikeb <mi...@openbsd.org> > > Date: Mon Nov 24 13:22:09 2008 +0000 > > > > Fix splasserts seen in pr 5987 by propagating a flag that discribes > > whether we're called from the interrupt context to the functions > > performing allocations. > > These days pf was protected with kernel lock and spl. Both are > released when sleeping. Now we have netlock and pflock. These are > rwlocks and not released during sleep. So this old race should not > exist anymore. > > > And we are not in interrupt context. > > Yes, it is ioctl(2). I think we should always malloc with M_WAITOK > when in syscall. Otherwise userland would have to cope with randomly > failing syscalls. > > > If this is sound, then the only reason why pfr_destroy_ktable was called > > is that pool_get is called with PR_NOWAIT. And then the following diff > > would help. > > The code is too complex to be sure what the reason of the syzkaller > panic is. Sleep in malloc is correct anyway and may improve the > situation. > > Functions with argument values 0 or 1 are hard to read. It would > be much nicer to pass M_WAITOK or M_NOWAIT. And the variable name > "intr" does not make sense anymore. pf does not run in interrupt > context. Call it "mflags" like in pfi_kif_alloc(). Or "wait" like > in other functions. > > Could you cleanup that also?
I have a diff, which touches the same area. It's a work in progress change. It moves all memory allocations outside of NET_LOCK/PF_LOCK. I'm just sending it for your reference now. I'm not sure flipping a flag is a right change. In general we don't want to hold NET_LOCK()/PF_LOCK() while waiting for memory. regards sashan --------8<---------------8<---------------8<------------------8<-------- diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c index 8315b115474..1f036e1368f 100644 --- a/sys/net/pf_ioctl.c +++ b/sys/net/pf_ioctl.c @@ -2217,12 +2217,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = ENODEV; goto fail; } - NET_LOCK(); - PF_LOCK(); error = pfr_add_tables(io->pfrio_buffer, io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); - PF_UNLOCK(); - NET_UNLOCK(); break; } diff --git a/sys/net/pf_table.c b/sys/net/pf_table.c index fb23bcabe04..cbaa728b105 100644 --- a/sys/net/pf_table.c +++ b/sys/net/pf_table.c @@ -189,6 +189,7 @@ void pfr_clstats_ktable(struct pfr_ktable *, time_t, int); struct pfr_ktable *pfr_create_ktable(struct pfr_table *, time_t, int, int); void pfr_destroy_ktables(struct pfr_ktableworkq *, int); +void pfr_destroy_ktables_aux(struct pfr_ktableworkq *); void pfr_destroy_ktable(struct pfr_ktable *, int); int pfr_ktable_compare(struct pfr_ktable *, struct pfr_ktable *); @@ -1493,14 +1494,16 @@ pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags) int pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) { - struct pfr_ktableworkq addq, changeq; - struct pfr_ktable *p, *q, *r, key; + struct pfr_ktableworkq addq, changeq, auxq; + struct pfr_ktable *p, *q, *r, *n, *w, key; int i, rv, xadd = 0; time_t tzero = gettime(); ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); SLIST_INIT(&addq); SLIST_INIT(&changeq); + SLIST_INIT(&auxq); + /* pre-allocate all memory outside of locks */ for (i = 0; i < size; i++) { YIELD(flags & PFR_FLAG_USERIOCTL); if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags)) @@ -1509,65 +1512,140 @@ pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) flags & PFR_FLAG_USERIOCTL)) senderr(EINVAL); key.pfrkt_flags |= PFR_TFLAG_ACTIVE; - p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); - if (p == NULL) { - p = pfr_create_ktable(&key.pfrkt_t, tzero, 1, - !(flags & PFR_FLAG_USERIOCTL)); - if (p == NULL) - senderr(ENOMEM); - SLIST_FOREACH(q, &addq, pfrkt_workq) { - if (!pfr_ktable_compare(p, q)) { - pfr_destroy_ktable(p, 0); - goto _skip; - } - } - SLIST_INSERT_HEAD(&addq, p, pfrkt_workq); - xadd++; - if (!key.pfrkt_anchor[0]) - goto _skip; + p = pfr_create_ktable(&key.pfrkt_t, tzero, 0, + !(flags & PFR_FLAG_USERIOCTL)); + if (p == NULL) + senderr(ENOMEM); - /* find or create root table */ - bzero(key.pfrkt_anchor, sizeof(key.pfrkt_anchor)); - r = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); - if (r != NULL) { - p->pfrkt_root = r; - goto _skip; - } - SLIST_FOREACH(q, &addq, pfrkt_workq) { - if (!pfr_ktable_compare(&key, q)) { - p->pfrkt_root = q; - goto _skip; - } + /* + * Note: we also pre-allocate a root table here. We keep it + * at ->pfrkt_root, which we must not forget about. + */ + key.pfrkt_flags = 0; + memset(key.pfrkt_anchor, 0, sizeof(key.pfrkt_anchor)); + p->pfrkt_root = pfr_create_ktable(&key.pfrkt_t, 0, 0, + !(flags & PFR_FLAG_USERIOCTL)); + if (p->pfrkt_root == NULL) { + pfr_destroy_ktable(p, 0); + senderr(ENOMEM); + } + + SLIST_FOREACH(q, &auxq, pfrkt_workq) { + if (!pfr_ktable_compare(p, q)) { + /* + * We need no lock here, because `p` is empty, + * there are no rules or shadow tables + * attached. + */ + pfr_destroy_ktable(p->pfrkt_root, 0); + p->pfrkt_root = NULL; + pfr_destroy_ktable(p, 0); + break; } - key.pfrkt_flags = 0; - r = pfr_create_ktable(&key.pfrkt_t, 0, 1, - !(flags & PFR_FLAG_USERIOCTL)); - if (r == NULL) - senderr(ENOMEM); - SLIST_INSERT_HEAD(&addq, r, pfrkt_workq); - p->pfrkt_root = r; - } else if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE)) { - SLIST_FOREACH(q, &changeq, pfrkt_workq) - if (!pfr_ktable_compare(&key, q)) - goto _skip; + } + + SLIST_INSERT_HEAD(&auxq, p, pfrkt_workq); + } + + /* + * auxq contains freshly allocated tables with no dups. + * also note there are no rulesets attached, because + * the attach operation requires PF_LOCK(). + */ + NET_LOCK(); + PF_LOCK(); + SLIST_FOREACH_SAFE(n, &auxq, pfrkt_workq, w) { + p = RB_FIND(pfr_ktablehead, &pfr_ktables, n); + if (p == NULL) { + SLIST_REMOVE(&auxq, n, pfr_ktable, pfrkt_workq); + SLIST_INSERT_HEAD(&addq, n, pfrkt_workq); + } else { p->pfrkt_nflags = (p->pfrkt_flags & ~PFR_TFLAG_USRMASK) | key.pfrkt_flags; SLIST_INSERT_HEAD(&changeq, p, pfrkt_workq); - xadd++; } -_skip: - ; + xadd++; + } + + /* + * addq contains tables we have to insert and attach rules to them + * changeq contains tables we need to update + * auxq contains pre-allocated tables, we won't use and we must free + * them + */ + SLIST_FOREACH_SAFE(p, &addq, pfrkt_workq, w) { + p->pfrkt_rs = pf_find_or_create_ruleset(p->pfrkt_anchor); + if (p->pfrkt_rs == NULL) { + xadd--; + SLIST_REMOVE(&addq, p, pfr_ktable, pfrkt_workq); + SLIST_INSERT_HEAD(&auxq, p, pfrkt_workq); + continue; + } + p->pfrkt_rs->tables++; + + if (!p->pfrkt_anchor[0]) { + q = p->pfrkt_root; + p->pfrkt_root = NULL; + SLIST_INSERT_HEAD(&auxq, q, pfrkt_workq); + continue; + } + + /* use pre-allocated root table as a key */ + q = p->pfrkt_root; + p->pfrkt_root = NULL; + r = RB_FIND(pfr_ktablehead, &pfr_ktables, q); + if (r != NULL) { + p->pfrkt_root = r; + SLIST_INSERT_HEAD(&auxq, q, pfrkt_workq); + continue; + } + /* + * there is a chance we could create root table in earlier + * iteration. such table may exist in addq only then. + */ + SLIST_FOREACH(r, &addq, pfrkt_workq) { + if (!pfr_ktable_compare(r, q)) { + /* + * `r` is our root table we've found earlier, + * `q` can get dropped. + */ + p->pfrkt_root = r; + SLIST_INSERT_HEAD(&auxq, q, pfrkt_workq); + continue; + } + } + q->pfrkt_rs = pf_find_or_create_ruleset( + q->pfrkt_root->pfrkt_anchor); + /* + * root tables are attached to main ruleset, + * because ->pfrkt_anchor[0] == '\0' + */ + KASSERT(q->pfrkt_rs == &pf_main_ruleset); + q->pfrkt_rs->tables++; + p->pfrkt_root = q; + SLIST_INSERT_HEAD(&addq, q, pfrkt_workq); } if (!(flags & PFR_FLAG_DUMMY)) { pfr_insert_ktables(&addq); pfr_setflags_ktables(&changeq); } else - pfr_destroy_ktables(&addq, 0); + pfr_destroy_ktables(&addq, 0); + + + /* + * all pfr_destroy_ktables() must be kept under the lock, + * because of pf_remove_if_empty_ruleset() + */ + PF_UNLOCK(); + NET_UNLOCK(); + + pfr_destroy_ktables_aux(&auxq); + if (nadd != NULL) *nadd = xadd; return (0); _bad: - pfr_destroy_ktables(&addq, 0); + pfr_destroy_ktables_aux(&auxq); return (rv); } @@ -2214,6 +2292,7 @@ pfr_create_ktable(struct pfr_table *tbl, time_t tzero, int attachruleset, kt->pfrkt_t = *tbl; if (attachruleset) { + PF_ASSERT_LOCKED(); rs = pf_find_or_create_ruleset(tbl->pfrt_anchor); if (!rs) { pfr_destroy_ktable(kt, 0); @@ -2249,6 +2328,31 @@ pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr) } } +void +pfr_destroy_ktables_aux(struct pfr_ktableworkq *auxq) +{ + struct pfr_ktable *p; + + while ((p = SLIST_FIRST(auxq)) != NULL) { + SLIST_REMOVE_HEAD(auxq, pfrkt_workq); + /* + * There must be no extra data (rules, shadow tables, ...) + * attached, because auxq holds just empty memory to be + * initialized. Therefore we can also be called with no lock. + */ + if (p->pfrkt_root != NULL) { + KASSERT(p->pfrkt_root->pfrkt_rs == NULL); + KASSERT(p->pfrkt_root->pfrkt_shadow == NULL); + KASSERT(p->pfrkt_root->pfrkt_root == NULL); + pfr_destroy_ktable(p->pfrkt_root, 0); + p->pfrkt_root = NULL; + } + KASSERT(p->pfrkt_rs == NULL); + KASSERT(p->pfrkt_shadow == NULL); + pfr_destroy_ktable(p, 0); + } +} + void pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr) {