On Wed, Jul 07, 2021 at 10:01:59PM +0200, Hrvoje Popovski wrote:
> On 7.7.2021. 19:38, Vitaliy Makkoveev wrote:
> > Hi,
> >
> > It seems the first the first panic occured because ipsp_spd_lookup()
> > modifies tdbp->tdb_policy_head and simultaneous execution breaks it.
> > I guess at least mutex(9) should be used to protect `tdb_policy_head'.
> >
> > The second panic occured because ipsp_acquire_sa() does
> > `ipsec_acquire_pool' initialization in runtime so parallel execution
> > breaks it. It's easy to fix.
> >
> > Could you try the diff below? It moves `ipsec_acquire_pool'
> > initialization to pfkey_init() just after `ipsec_policy_pool'
> > initialization. This should fix the second panic.
>
>
> Hi,
>
> with this diff i manage to get this panics ..
>
> r620-1# uvm_fault(0xfffffd842f83e170, 0x147, 0, 2) -> e
> kernel: page fault trap, code=0
> Stopped at tdb_free+0x9c: movq %rsi,0(%rdi)
> TID PID UID PRFLAGS PFLAGS CPU COMMAND
> * 38448 89106 68 0x10 0 1K isakmpd
> tdb_free(ffff80000131e688) at tdb_free+0x9c
> pfkeyv2_send(fffffd83b2512d30,ffff8000012f2680,50) at pfkeyv2_send+0x1191
> pfkeyv2_output(fffffd80a4b37400,fffffd83b2512d30,0,0) at pfkeyv2_output+0x8a
> pfkeyv2_usrreq(fffffd83b2512d30,9,fffffd80a4b37400,0,0,ffff800023908d20)
> at pfkeyv2_usrreq+0x1b0
> sosend(fffffd83b2512d30,0,ffff800023959130,0,0,0) at sosend+0x3a9
> dofilewritev(ffff800023908d20,7,ffff800023959130,0,ffff800023959230) at
> dofilewritev+0x14d
> sys_writev(ffff800023908d20,ffff8000239591d0,ffff800023959230) at
> sys_writev+0xe2
> syscall(ffff8000239592a0) at syscall+0x3b9
> Xsyscall() at Xsyscall+0x128
> end of kernel
> end trace frame: 0x7f7ffffb27c0, count: 6
> https://www.openbsd.org/ddb.html describes the minimum info required in
> bug reports. Insufficient info makes it difficult to find and fix bugs.
> ddb{1}>
>
>
>
> r620-1# uvm_fault(0xffffffff82379d80, 0x147, 0, 2) -> e
> kernel: page fault trap, code=0
> Stopped at ipsp_spd_lookup+0x9a4: movq %rax,0(%rcx)
> TID PID UID PRFLAGS PFLAGS CPU COMMAND
> 434446 37326 0 0x14000 0x200 4 softnet
> 405134 41722 0 0x14000 0x200 1 softnet
> 106389 88948 0 0x14000 0x200 3 softnet
> *262295 93659 0 0x14000 0x200 2 softnet
> ipsp_spd_lookup(fffffd80a44f6f00,2,14,ffff8000238651bc,2,0) at
> ipsp_spd_lookup+0x9a4
> ip_output_ipsec_lookup(fffffd80a44f6f00,14,ffff8000238651bc,0,0) at
> ip_output_ipsec_lookup+0x4d
> ip_output(fffffd80a44f6f00,0,ffff800023865348,1,0,0) at ip_output+0x42a
> ip_forward(fffffd80a44f6f00,ffff800000087048,fffffd83b3ea6d98,0) at
> ip_forward+0x26a
> ip_input_if(ffff800023865488,ffff800023865494,4,0,ffff800000087048) at
> ip_input_if+0x365
> ipv4_input(ffff800000087048,fffffd80a44f6f00) at ipv4_input+0x39
> if_input_process(ffff800000087048,ffff800023865508) at if_input_process+0x6f
> ifiq_process(ffff800000086c00) at ifiq_process+0x69
> taskq_thread(ffff800000030000) at taskq_thread+0x9f
> end trace frame: 0x0, count: 6
> https://www.openbsd.org/ddb.html describes the minimum info required in
> bug reports. Insufficient info makes it difficult to find and fix bugs.
>
Thanks. ipsp_spd_lookup() stopped panic in pool_get(9).
I guess the panics continue because simultaneous modifications of
'tdbp->tdb_policy_head' break it. Could you try the diff below? It
introduces `tdb_polhd_mtx' mutex(9) and uses it to protect
'tdbp->tdb_policy_head' modifications. I don't propose this diff for
commit but to check my suggestion.
Index: sys/net/pfkeyv2.c
===================================================================
RCS file: /cvs/src/sys/net/pfkeyv2.c,v
retrieving revision 1.216
diff -u -p -r1.216 pfkeyv2.c
--- sys/net/pfkeyv2.c 5 Jul 2021 12:01:20 -0000 1.216
+++ sys/net/pfkeyv2.c 7 Jul 2021 20:21:44 -0000
@@ -2000,9 +2000,12 @@ pfkeyv2_send(struct socket *so, void *me
(caddr_t)&ipo->ipo_mask, rnh,
ipo->ipo_nodes, 0)) == NULL) {
/* Remove from linked list of policies on TDB */
- if (ipo->ipo_tdb)
+ if (ipo->ipo_tdb) {
+ mtx_enter(&tdb_polhd_mtx);
TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head,
ipo, ipo_tdb_next);
+ mtx_leave(&tdb_polhd_mtx);
+ }
if (ipo->ipo_ids)
ipsp_ids_free(ipo->ipo_ids);
Index: sys/netinet/ip_ipsp.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_ipsp.c,v
retrieving revision 1.238
diff -u -p -r1.238 ip_ipsp.c
--- sys/netinet/ip_ipsp.c 10 Mar 2021 10:21:49 -0000 1.238
+++ sys/netinet/ip_ipsp.c 7 Jul 2021 20:21:44 -0000
@@ -92,6 +92,7 @@ u_int64_t ipsec_last_added = 0;
int ipsec_ids_idle = 100; /* keep free ids for 100s */
struct pool tdb_pool;
+struct mutex tdb_polhd_mtx=MUTEX_INITIALIZER(IPL_SOFTNET);
/* Protected by the NET_LOCK(). */
u_int32_t ipsec_ids_next_flow = 1; /* may not be zero */
@@ -846,12 +847,14 @@ tdb_free(struct tdb *tdbp)
#endif
/* Cleanup SPD references. */
+ mtx_enter(&tdb_polhd_mtx);
for (ipo = TAILQ_FIRST(&tdbp->tdb_policy_head); ipo;
ipo = TAILQ_FIRST(&tdbp->tdb_policy_head)) {
TAILQ_REMOVE(&tdbp->tdb_policy_head, ipo, ipo_tdb_next);
ipo->ipo_tdb = NULL;
ipo->ipo_last_searched = 0; /* Force a re-search. */
}
+ mtx_leave(&tdb_polhd_mtx);
if (tdbp->tdb_ids) {
ipsp_ids_free(tdbp->tdb_ids);
Index: sys/netinet/ip_ipsp.h
===================================================================
RCS file: /cvs/src/sys/netinet/ip_ipsp.h,v
retrieving revision 1.197
diff -u -p -r1.197 ip_ipsp.h
--- sys/netinet/ip_ipsp.h 4 May 2021 09:28:04 -0000 1.197
+++ sys/netinet/ip_ipsp.h 7 Jul 2021 20:21:44 -0000
@@ -480,6 +480,8 @@ struct xformsw {
int, int); /* output */
};
+extern struct mutex tdb_polhd_mtx;
+
extern int ipsec_in_use;
extern u_int64_t ipsec_last_added;
extern int encdebug; /* enable message reporting */
Index: sys/netinet/ip_spd.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_spd.c,v
retrieving revision 1.103
diff -u -p -r1.103 ip_spd.c
--- sys/netinet/ip_spd.c 4 May 2021 09:28:04 -0000 1.103
+++ sys/netinet/ip_spd.c 7 Jul 2021 20:21:44 -0000
@@ -370,8 +370,10 @@ ipsp_spd_lookup(struct mbuf *m, int af,
/* Do we have a cached entry ? If so, check if it's still valid. */
if ((ipo->ipo_tdb) && (ipo->ipo_tdb->tdb_flags & TDBF_INVALID)) {
+ mtx_enter(&tdb_polhd_mtx);
TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo,
ipo_tdb_next);
+ mtx_leave(&tdb_polhd_mtx);
ipo->ipo_tdb = NULL;
}
@@ -419,8 +421,10 @@ ipsp_spd_lookup(struct mbuf *m, int af,
nomatchout:
/* Cached TDB was not good. */
+ mtx_enter(&tdb_polhd_mtx);
TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo,
ipo_tdb_next);
+ mtx_leave(&tdb_polhd_mtx);
ipo->ipo_tdb = NULL;
ipo->ipo_last_searched = 0;
}
@@ -447,8 +451,10 @@ ipsp_spd_lookup(struct mbuf *m, int af,
ids ? ids: ipo->ipo_ids,
&ipo->ipo_addr, &ipo->ipo_mask);
if (ipo->ipo_tdb) {
+ mtx_enter(&tdb_polhd_mtx);
TAILQ_INSERT_TAIL(&ipo->ipo_tdb->tdb_policy_head,
ipo, ipo_tdb_next);
+ mtx_leave(&tdb_polhd_mtx);
*error = 0;
return ipsp_spd_inp(m, af, hlen, error,
direction, tdbp, inp, ipo);
@@ -521,12 +527,14 @@ ipsp_spd_lookup(struct mbuf *m, int af,
goto nomatchin;
/* Add it to the cache. */
+ mtx_enter(&tdb_polhd_mtx);
if (ipo->ipo_tdb)
TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head,
ipo, ipo_tdb_next);
ipo->ipo_tdb = tdbp;
TAILQ_INSERT_TAIL(&tdbp->tdb_policy_head, ipo,
ipo_tdb_next);
+ mtx_leave(&tdb_polhd_mtx);
*error = 0;
return ipsp_spd_inp(m, af, hlen, error, direction,
tdbp, inp, ipo);
@@ -550,8 +558,10 @@ ipsp_spd_lookup(struct mbuf *m, int af,
goto skipinputsearch;
/* Not applicable, unlink. */
+ mtx_enter(&tdb_polhd_mtx);
TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo,
ipo_tdb_next);
+ mtx_leave(&tdb_polhd_mtx);
ipo->ipo_last_searched = 0;
ipo->ipo_tdb = NULL;
}
@@ -566,9 +576,12 @@ ipsp_spd_lookup(struct mbuf *m, int af,
dignore ? &ssrc : &ipo->ipo_dst,
ipo->ipo_sproto, ipo->ipo_ids,
&ipo->ipo_addr, &ipo->ipo_mask);
- if (ipo->ipo_tdb)
+ if (ipo->ipo_tdb) {
+ mtx_enter(&tdb_polhd_mtx);
TAILQ_INSERT_TAIL(&ipo->ipo_tdb->tdb_policy_head,
ipo, ipo_tdb_next);
+ mtx_leave(&tdb_polhd_mtx);
+ }
}
skipinputsearch:
@@ -638,9 +651,12 @@ ipsec_delete_policy(struct ipsec_policy
rn_delete(&ipo->ipo_addr, &ipo->ipo_mask, rnh, rn) == NULL)
return (ESRCH);
- if (ipo->ipo_tdb != NULL)
+ if (ipo->ipo_tdb != NULL) {
+ mtx_enter(&tdb_polhd_mtx);
TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo,
ipo_tdb_next);
+ mtx_leave(&tdb_polhd_mtx);
+ }
while ((ipa = TAILQ_FIRST(&ipo->ipo_acquires)) != NULL)
ipsp_delete_acquire(ipa);