On Tue, May 03, 2022 at 09:31:51PM +0200, Alexander Bluhm wrote:
> On Tue, May 03, 2022 at 07:42:34PM +0200, Moritz Buhl wrote:
> > commit 4b3977248902c22d96aaebdb5784840debc2631c
> > Author: mikeb <mi...@openbsd.org>
> > Date:   Mon Nov 24 13:22:09 2008 +0000
> > 
> >     Fix splasserts seen in pr 5987 by propagating a flag that discribes
> >     whether we're called from the interrupt context to the functions
> >     performing allocations.
> 
> These days pf was protected with kernel lock and spl.  Both are
> released when sleeping.  Now we have netlock and pflock.  These are
> rwlocks and not released during sleep.  So this old race should not
> exist anymore.
> 
> > And we are not in interrupt context.
> 
> Yes, it is ioctl(2).  I think we should always malloc with M_WAITOK
> when in syscall.  Otherwise userland would have to cope with randomly
> failing syscalls.
> 
> > If this is sound, then the only reason why pfr_destroy_ktable was called
> > is that pool_get is called with PR_NOWAIT.  And then the following diff
> > would help.
> 
> The code is too complex to be sure what the reason of the syzkaller
> panic is.  Sleep in malloc is correct anyway and may improve the
> situation.
> 
> Functions with argument values 0 or 1 are hard to read.  It would
> be much nicer to pass M_WAITOK or M_NOWAIT.  And the variable name
> "intr" does not make sense anymore.  pf does not run in interrupt
> context.  Call it "mflags" like in pfi_kif_alloc().  Or "wait" like
> in other functions.
> 
> Could you cleanup that also?

    I have a diff, which touches the same area. It's a work in progress change.
    It moves all memory allocations outside of NET_LOCK/PF_LOCK. I'm just
    sending it for your reference now.

    I'm not sure flipping a flag is a right change. In general we don't want
    to hold NET_LOCK()/PF_LOCK() while waiting for memory.

regards
sashan

--------8<---------------8<---------------8<------------------8<--------
diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c
index 8315b115474..1f036e1368f 100644
--- a/sys/net/pf_ioctl.c
+++ b/sys/net/pf_ioctl.c
@@ -2217,12 +2217,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
                        error = ENODEV;
                        goto fail;
                }
-               NET_LOCK();
-               PF_LOCK();
                error = pfr_add_tables(io->pfrio_buffer, io->pfrio_size,
                    &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL);
-               PF_UNLOCK();
-               NET_UNLOCK();
                break;
        }
 
diff --git a/sys/net/pf_table.c b/sys/net/pf_table.c
index fb23bcabe04..cbaa728b105 100644
--- a/sys/net/pf_table.c
+++ b/sys/net/pf_table.c
@@ -189,6 +189,7 @@ void                         pfr_clstats_ktable(struct 
pfr_ktable *, time_t, int);
 struct pfr_ktable      *pfr_create_ktable(struct pfr_table *, time_t, int,
                            int);
 void                    pfr_destroy_ktables(struct pfr_ktableworkq *, int);
+void                    pfr_destroy_ktables_aux(struct pfr_ktableworkq *);
 void                    pfr_destroy_ktable(struct pfr_ktable *, int);
 int                     pfr_ktable_compare(struct pfr_ktable *,
                            struct pfr_ktable *);
@@ -1493,14 +1494,16 @@ pfr_clr_tables(struct pfr_table *filter, int *ndel, int 
flags)
 int
 pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags)
 {
-       struct pfr_ktableworkq   addq, changeq;
-       struct pfr_ktable       *p, *q, *r, key;
+       struct pfr_ktableworkq   addq, changeq, auxq;
+       struct pfr_ktable       *p, *q, *r, *n, *w, key;
        int                      i, rv, xadd = 0;
        time_t                   tzero = gettime();
 
        ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
        SLIST_INIT(&addq);
        SLIST_INIT(&changeq);
+       SLIST_INIT(&auxq);
+       /* pre-allocate all memory outside of locks */
        for (i = 0; i < size; i++) {
                YIELD(flags & PFR_FLAG_USERIOCTL);
                if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags))
@@ -1509,65 +1512,140 @@ pfr_add_tables(struct pfr_table *tbl, int size, int 
*nadd, int flags)
                    flags & PFR_FLAG_USERIOCTL))
                        senderr(EINVAL);
                key.pfrkt_flags |= PFR_TFLAG_ACTIVE;
-               p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
-               if (p == NULL) {
-                       p = pfr_create_ktable(&key.pfrkt_t, tzero, 1,
-                           !(flags & PFR_FLAG_USERIOCTL));
-                       if (p == NULL)
-                               senderr(ENOMEM);
-                       SLIST_FOREACH(q, &addq, pfrkt_workq) {
-                               if (!pfr_ktable_compare(p, q)) {
-                                       pfr_destroy_ktable(p, 0);
-                                       goto _skip;
-                               }
-                       }
-                       SLIST_INSERT_HEAD(&addq, p, pfrkt_workq);
-                       xadd++;
-                       if (!key.pfrkt_anchor[0])
-                               goto _skip;
+               p = pfr_create_ktable(&key.pfrkt_t, tzero, 0,
+                   !(flags & PFR_FLAG_USERIOCTL));
+               if (p == NULL)
+                       senderr(ENOMEM);
 
-                       /* find or create root table */
-                       bzero(key.pfrkt_anchor, sizeof(key.pfrkt_anchor));
-                       r = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
-                       if (r != NULL) {
-                               p->pfrkt_root = r;
-                               goto _skip;
-                       }
-                       SLIST_FOREACH(q, &addq, pfrkt_workq) {
-                               if (!pfr_ktable_compare(&key, q)) {
-                                       p->pfrkt_root = q;
-                                       goto _skip;
-                               }
+               /*
+                * Note: we also pre-allocate a root table here. We keep it
+                * at ->pfrkt_root, which we must not forget about.
+                */
+               key.pfrkt_flags = 0;
+               memset(key.pfrkt_anchor, 0, sizeof(key.pfrkt_anchor));
+               p->pfrkt_root = pfr_create_ktable(&key.pfrkt_t, 0, 0,
+                   !(flags & PFR_FLAG_USERIOCTL));
+               if (p->pfrkt_root == NULL) {
+                       pfr_destroy_ktable(p, 0);
+                       senderr(ENOMEM);
+               }
+
+               SLIST_FOREACH(q, &auxq, pfrkt_workq) {
+                       if (!pfr_ktable_compare(p, q)) {
+                               /*
+                                * We need no lock here, because `p` is empty,
+                                * there are no rules or shadow tables
+                                * attached.
+                                */
+                               pfr_destroy_ktable(p->pfrkt_root, 0);
+                               p->pfrkt_root = NULL;
+                               pfr_destroy_ktable(p, 0);
+                               break;
                        }
-                       key.pfrkt_flags = 0;
-                       r = pfr_create_ktable(&key.pfrkt_t, 0, 1,
-                           !(flags & PFR_FLAG_USERIOCTL));
-                       if (r == NULL)
-                               senderr(ENOMEM);
-                       SLIST_INSERT_HEAD(&addq, r, pfrkt_workq);
-                       p->pfrkt_root = r;
-               } else if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
-                       SLIST_FOREACH(q, &changeq, pfrkt_workq)
-                               if (!pfr_ktable_compare(&key, q))
-                                       goto _skip;
+               }
+
+               SLIST_INSERT_HEAD(&auxq, p, pfrkt_workq);
+       }
+
+       /*
+        * auxq contains freshly allocated tables with no dups.
+        * also note there are no rulesets attached, because
+        * the attach operation requires PF_LOCK().
+        */
+       NET_LOCK();
+       PF_LOCK();
+       SLIST_FOREACH_SAFE(n, &auxq, pfrkt_workq, w) {
+               p = RB_FIND(pfr_ktablehead, &pfr_ktables, n);
+               if (p == NULL) {
+                       SLIST_REMOVE(&auxq, n, pfr_ktable, pfrkt_workq);
+                       SLIST_INSERT_HEAD(&addq, n, pfrkt_workq);
+               } else {
                        p->pfrkt_nflags = (p->pfrkt_flags &
                            ~PFR_TFLAG_USRMASK) | key.pfrkt_flags;
                        SLIST_INSERT_HEAD(&changeq, p, pfrkt_workq);
-                       xadd++;
                }
-_skip:
-       ;
+               xadd++;
+       }
+
+       /*
+        * addq contains tables we have to insert and attach rules to them
+        * changeq contains tables we need to update
+        * auxq contains pre-allocated tables, we won't use and we must free
+        * them
+        */
+       SLIST_FOREACH_SAFE(p, &addq, pfrkt_workq, w) {
+               p->pfrkt_rs = pf_find_or_create_ruleset(p->pfrkt_anchor);
+               if (p->pfrkt_rs == NULL) {
+                       xadd--;
+                       SLIST_REMOVE(&addq, p, pfr_ktable, pfrkt_workq);
+                       SLIST_INSERT_HEAD(&auxq, p, pfrkt_workq);
+                       continue;
+               }
+               p->pfrkt_rs->tables++;
+
+               if (!p->pfrkt_anchor[0]) {
+                       q = p->pfrkt_root;
+                       p->pfrkt_root = NULL;
+                       SLIST_INSERT_HEAD(&auxq, q, pfrkt_workq);
+                       continue;
+               }
+
+               /* use pre-allocated root table as a key */
+               q = p->pfrkt_root;
+               p->pfrkt_root = NULL;
+               r = RB_FIND(pfr_ktablehead, &pfr_ktables, q);
+               if (r != NULL) {
+                       p->pfrkt_root = r;
+                       SLIST_INSERT_HEAD(&auxq, q, pfrkt_workq);
+                       continue;
+               }
+               /*
+                * there is a chance we could create root table in earlier
+                * iteration. such table may exist in addq only then.
+                */
+               SLIST_FOREACH(r, &addq, pfrkt_workq) {
+                       if (!pfr_ktable_compare(r, q)) {
+                               /*
+                                * `r` is our root table we've found earlier,
+                                * `q` can get dropped.
+                                */
+                               p->pfrkt_root = r;
+                               SLIST_INSERT_HEAD(&auxq, q, pfrkt_workq);
+                               continue;
+                       }
+               }
+               q->pfrkt_rs = pf_find_or_create_ruleset(
+                   q->pfrkt_root->pfrkt_anchor);
+               /*
+                * root tables are attached to main ruleset,
+                * because ->pfrkt_anchor[0] == '\0'
+                */
+               KASSERT(q->pfrkt_rs == &pf_main_ruleset);
+               q->pfrkt_rs->tables++;
+               p->pfrkt_root = q;
+               SLIST_INSERT_HEAD(&addq, q, pfrkt_workq);
        }
        if (!(flags & PFR_FLAG_DUMMY)) {
                pfr_insert_ktables(&addq);
                pfr_setflags_ktables(&changeq);
        } else
-                pfr_destroy_ktables(&addq, 0);
+               pfr_destroy_ktables(&addq, 0);
+
+
+       /*
+        * all pfr_destroy_ktables() must be kept under the lock,
+        * because of pf_remove_if_empty_ruleset()
+        */
+       PF_UNLOCK();
+       NET_UNLOCK();
+
+       pfr_destroy_ktables_aux(&auxq);
+
        if (nadd != NULL)
                *nadd = xadd;
        return (0);
 _bad:
-       pfr_destroy_ktables(&addq, 0);
+       pfr_destroy_ktables_aux(&auxq);
        return (rv);
 }
 
@@ -2214,6 +2292,7 @@ pfr_create_ktable(struct pfr_table *tbl, time_t tzero, 
int attachruleset,
        kt->pfrkt_t = *tbl;
 
        if (attachruleset) {
+               PF_ASSERT_LOCKED();
                rs = pf_find_or_create_ruleset(tbl->pfrt_anchor);
                if (!rs) {
                        pfr_destroy_ktable(kt, 0);
@@ -2249,6 +2328,31 @@ pfr_destroy_ktables(struct pfr_ktableworkq *workq, int 
flushaddr)
        }
 }
 
+void
+pfr_destroy_ktables_aux(struct pfr_ktableworkq *auxq)
+{
+       struct pfr_ktable       *p;
+
+       while ((p = SLIST_FIRST(auxq)) != NULL) {
+               SLIST_REMOVE_HEAD(auxq, pfrkt_workq);
+               /*
+                * There must be no extra data (rules, shadow tables, ...)
+                * attached, because auxq holds just empty memory to be
+                * initialized. Therefore we can also be called with no lock.
+                */
+               if (p->pfrkt_root != NULL) {
+                       KASSERT(p->pfrkt_root->pfrkt_rs == NULL);
+                       KASSERT(p->pfrkt_root->pfrkt_shadow == NULL);
+                       KASSERT(p->pfrkt_root->pfrkt_root == NULL);
+                       pfr_destroy_ktable(p->pfrkt_root, 0);
+                       p->pfrkt_root = NULL;
+               }
+               KASSERT(p->pfrkt_rs == NULL);
+               KASSERT(p->pfrkt_shadow == NULL);
+               pfr_destroy_ktable(p, 0);
+       }
+}
+
 void
 pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr)
 {

Reply via email to