On Sat, Dec 18, 2021 at 01:07:00AM +0100, Alexander Bluhm wrote:
> Hi,
>
> There are occasions where the walker in tdb_walk() might sleep.
> Case SADB_DUMP is such a case. And mvs@ has a diff that sleeps to
> read the counters. So holding the tdb_sadb_mtx() when calling
> walker() is not allowed.
>
> Move the TDB from the TDB-Hash to a list that is protected by
> netlock. Then unlock tdb_sadb_mtx and traverse the list to call
> walker().
>
> We need less tdb_sadb_mtx dances with that solution. If needed,
> netlock protection can be replaced with another rwlock later.
>
> ok?
>
ok mvs@
> bluhm
>
> Index: net/pfkeyv2.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfkeyv2.c,v
> retrieving revision 1.228
> diff -u -p -r1.228 pfkeyv2.c
> --- net/pfkeyv2.c 14 Dec 2021 17:50:37 -0000 1.228
> +++ net/pfkeyv2.c 17 Dec 2021 21:59:00 -0000
> @@ -1045,7 +1045,7 @@ pfkeyv2_sa_flush(struct tdb *tdb, void *
> {
> if (!(*((u_int8_t *) satype_vp)) ||
> tdb->tdb_satype == *((u_int8_t *) satype_vp))
> - tdb_delete_locked(tdb);
> + tdb_delete(tdb);
> return (0);
> }
>
> Index: netinet/ip_ipsp.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.c,v
> retrieving revision 1.265
> diff -u -p -r1.265 ip_ipsp.c
> --- netinet/ip_ipsp.c 14 Dec 2021 17:50:37 -0000 1.265
> +++ netinet/ip_ipsp.c 17 Dec 2021 22:02:27 -0000
> @@ -90,7 +90,6 @@ void tdb_firstuse(void *);
> void tdb_soft_timeout(void *);
> void tdb_soft_firstuse(void *);
> int tdb_hash(u_int32_t, union sockaddr_union *, u_int8_t);
> -void tdb_dodelete(struct tdb *, int locked);
>
> int ipsec_in_use = 0;
> u_int64_t ipsec_last_added = 0;
> @@ -627,30 +626,36 @@ tdb_printit(void *addr, int full, int (*
> int
> tdb_walk(u_int rdomain, int (*walker)(struct tdb *, void *, int), void *arg)
> {
> - int i, rval = 0;
> - struct tdb *tdbp, *next;
> + SIMPLEQ_HEAD(, tdb) tdblist;
> + struct tdb *tdbp;
> + int i, rval;
>
> /*
> - * The walker may aquire the kernel lock. Grab it here to keep
> - * the lock order.
> + * The walker may sleep. So we cannot hold the tdb_sadb_mtx while
> + * traversing the tdb_hnext list. Create a new tdb_walk list with
> + * exclusive netlock protection.
> */
> - KERNEL_LOCK();
> + NET_ASSERT_WLOCKED();
> + SIMPLEQ_INIT(&tdblist);
> +
> mtx_enter(&tdb_sadb_mtx);
> for (i = 0; i <= tdb_hashmask; i++) {
> - for (tdbp = tdbh[i]; rval == 0 && tdbp != NULL; tdbp = next) {
> - next = tdbp->tdb_hnext;
> -
> + for (tdbp = tdbh[i]; tdbp != NULL; tdbp = tdbp->tdb_hnext) {
> if (rdomain != tdbp->tdb_rdomain)
> continue;
> -
> - if (i == tdb_hashmask && next == NULL)
> - rval = walker(tdbp, (void *)arg, 1);
> - else
> - rval = walker(tdbp, (void *)arg, 0);
> + tdb_ref(tdbp);
> + SIMPLEQ_INSERT_TAIL(&tdblist, tdbp, tdb_walk);
> }
> }
> mtx_leave(&tdb_sadb_mtx);
> - KERNEL_UNLOCK();
> +
> + rval = 0;
> + while ((tdbp = SIMPLEQ_FIRST(&tdblist)) != NULL) {
> + SIMPLEQ_REMOVE_HEAD(&tdblist, tdb_walk);
> + if (rval == 0)
> + rval = walker(tdbp, arg, SIMPLEQ_EMPTY(&tdblist));
> + tdb_unref(tdbp);
> + }
>
> return rval;
> }
> @@ -764,7 +769,6 @@ tdb_rehash(void)
> return (ENOMEM);
> }
>
> -
> for (i = 0; i <= old_hashmask; i++) {
> for (tdbp = tdbh[i]; tdbp != NULL; tdbp = tdbnp) {
> tdbnp = tdbp->tdb_hnext;
> @@ -1004,19 +1008,6 @@ tdb_unref(struct tdb *tdb)
> void
> tdb_delete(struct tdb *tdbp)
> {
> - tdb_dodelete(tdbp, 0);
> -}
> -
> -void
> -tdb_delete_locked(struct tdb *tdbp)
> -{
> - MUTEX_ASSERT_LOCKED(&tdb_sadb_mtx);
> - tdb_dodelete(tdbp, 1);
> -}
> -
> -void
> -tdb_dodelete(struct tdb *tdbp, int locked)
> -{
> NET_ASSERT_LOCKED();
>
> mtx_enter(&tdbp->tdb_mtx);
> @@ -1026,10 +1017,7 @@ tdb_dodelete(struct tdb *tdbp, int locke
> }
> tdbp->tdb_flags |= TDBF_DELETED;
> mtx_leave(&tdbp->tdb_mtx);
> - if (locked)
> - tdb_unlink_locked(tdbp);
> - else
> - tdb_unlink(tdbp);
> + tdb_unlink(tdbp);
>
> /* cleanup SPD references */
> tdb_cleanspd(tdbp);
> Index: netinet/ip_ipsp.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.h,v
> retrieving revision 1.231
> diff -u -p -r1.231 ip_ipsp.h
> --- netinet/ip_ipsp.h 14 Dec 2021 17:50:37 -0000 1.231
> +++ netinet/ip_ipsp.h 17 Dec 2021 21:59:00 -0000
> @@ -335,6 +335,7 @@ struct tdb { /* tunnel
> descriptor blo
> struct tdb *tdb_snext; /* [s] src/sproto table */
> struct tdb *tdb_inext;
> struct tdb *tdb_onext;
> + SIMPLEQ_ENTRY(tdb) tdb_walk; /* [N] temp list for tdb walker */
>
> struct refcnt tdb_refcnt;
> struct mutex tdb_mtx;
> @@ -583,7 +584,6 @@ struct tdb *gettdbbysrcdst_dir(u_int, u_
> void puttdb(struct tdb *);
> void puttdb_locked(struct tdb *);
> void tdb_delete(struct tdb *);
> -void tdb_delete_locked(struct tdb *);
> struct tdb *tdb_alloc(u_int);
> struct tdb *tdb_ref(struct tdb *);
> void tdb_unref(struct tdb *);
>