On 4.12.2021. 10:41, Hrvoje Popovski wrote:
> On 3.12.2021. 20:35, Alexander Bluhm wrote:
>> Hi,
>>
>> After implementing MP safe refcounting for IPsec TDB, I wonder how
>> stable my diff for forwarding on multiple CPU is.
>>
>> Note that IPsec still has the workaround to disable multiple queues.
>> We do not have a mutex that protects the TDB fields yet. We have
>> only fixed the memory management.
>>
>> My goal is to get real MP pressure on the lower part of the IP
>> network stack. Only this will show remaining bugs.
> Hi,
>
> with only plain forwarding it seems that everything works just fine, but
> with ipsec i'm getting this panic. i will leave ddb console active if
> something else will be needed
Now with WITNESS
r620-2# uvm_fault(0xffffffff8226ee78, 0x137, 0, 2) -> e
kernel: page fault trap, code=0
Stopped at ipsp_spd_lookup+0xa2f: movq %rax,0(%rcx)
TID PID UID PRFLAGS PFLAGS CPU COMMAND
258332 43705 0 0x14000 0x200 4 softnet
*440260 99913 0 0x14000 0x200 3K softnet
63340 24364 0 0x14000 0x200 2 softnet
ipsp_spd_lookup(fffffd80a46ed700,2,14,2,0,0,dcfe3c78989b6cd1,fffffd80a46ed700)
at ipsp_spd_lookup+0xa2f
ip_output_ipsec_lookup(fffffd80a46ed700,14,0,ffff800022c71248,0) at
ip_output_ipsec_lookup+0x4c
ip_output(fffffd80a46ed700,0,ffff800022c71408,1,0,0,6db6075421f27eb8) at
ip_output+0x39d
ip_forward(fffffd80a46ed700,ffff800000087048,fffffd83b4374cb0,0) at
ip_forward+0x26a
ip_input_if(ffff800022c71548,ffff800022c71554,4,0,ffff800000087048) at
ip_input_if+0x353
ipv4_input(ffff800000087048,fffffd80a46ed700) at ipv4_input+0x39
ether_input(ffff800000087048,fffffd80a46ed700) at ether_input+0x3aa
if_input_process(ffff800000087048,ffff800022c71638) at if_input_process+0x92
ifiq_process(ffff800000086e00) at ifiq_process+0x69
taskq_thread(ffff800000030200) at taskq_thread+0x9f
end trace frame: 0x0, count: 5
https://www.openbsd.org/ddb.html describes the minimum info required in
bug reports. Insufficient info makes it difficult to find and fix bugs.
ddb{3}>
ddb{3}> show reg
rdi 0
rsi 0xffff80000145d018
rbp 0xffff800022c71150
rbx 0
rdx 0x1
rcx 0x137
rax 0xffffffffffffffff
r8 0xfffffd839a222d30
r9 0xffff800001453904
r10 0xffff80000142f240
r11 0xebc2ae563992db8e
r12 0xffff800022c71178
r13 0
r14 0x1
r15 0
rip 0xffffffff811f56cf ipsp_spd_lookup+0xa2f
cs 0x8
rflags 0x10213 __ALIGN_SIZE+0xf213
rsp 0xffff800022c71040
ss 0x10
ipsp_spd_lookup+0xa2f: movq %rax,0(%rcx)
ddb{3}>
ddb{3}> show locks
exclusive kernel_lock &kernel_lock r = 0 (0xffffffff822b3ce8)
#0 witness_lock+0x333
#1 kpageflttrap+0x172
#2 kerntrap+0x91
#3 alltraps_kern_meltdown+0x7b
#4 ipsp_spd_lookup+0xa2f
#5 ip_output_ipsec_lookup+0x4c
#6 ip_output+0x39d
#7 ip_forward+0x26a
#8 ip_input_if+0x353
#9 ipv4_input+0x39
#10 ether_input+0x3aa
#11 if_input_process+0x92
#12 ifiq_process+0x69
#13 taskq_thread+0x9f
#14 proc_trampoline+0x1c
shared rwlock netlock r = 0 (0xffffffff8217f598)
#0 witness_lock+0x333
#1 rw_enter+0x27f
#2 if_input_process+0x75
#3 ifiq_process+0x69
#4 taskq_thread+0x9f
#5 proc_trampoline+0x1c
shared rwlock softnet r = 0 (0xffff800000030270)
#0 witness_lock+0x333
#1 taskq_thread+0x92
#2 proc_trampoline+0x1c
ddb{3}>
ddb{3}> show all locks
Process 43705 (softnet) thread 0xffff8000ffffe540 (258332)
shared rwlock netlock r = 0 (0xffffffff8217f598)
#0 witness_lock+0x333
#1 rw_enter+0x27f
#2 if_input_process+0x75
#3 ifiq_process+0x69
#4 taskq_thread+0x9f
#5 proc_trampoline+0x1c
shared rwlock softnet r = 0 (0xffff800000030370)
#0 witness_lock+0x333
#1 taskq_thread+0x92
#2 proc_trampoline+0x1c
Process 99913 (softnet) thread 0xffff8000ffffe000 (440260)
exclusive kernel_lock &kernel_lock r = 0 (0xffffffff822b3ce8)
#0 witness_lock+0x333
#1 kpageflttrap+0x172
#2 kerntrap+0x91
#3 alltraps_kern_meltdown+0x7b
#4 ipsp_spd_lookup+0xa2f
#5 ip_output_ipsec_lookup+0x4c
#6 ip_output+0x39d
#7 ip_forward+0x26a
#8 ip_input_if+0x353
#9 ipv4_input+0x39
#10 ether_input+0x3aa
#11 if_input_process+0x92
#12 ifiq_process+0x69
#13 taskq_thread+0x9f
#14 proc_trampoline+0x1c
shared rwlock netlock r = 0 (0xffffffff8217f598)
#0 witness_lock+0x333
#1 rw_enter+0x27f
#2 if_input_process+0x75
#3 ifiq_process+0x69
#4 taskq_thread+0x9f
#5 proc_trampoline+0x1c
shared rwlock softnet r = 0 (0xffff800000030270)
#0 witness_lock+0x333
#1 taskq_thread+0x92
#2 proc_trampoline+0x1c
Process 39040 (softnet) thread 0xffff8000ffffe2a0 (358256)
shared rwlock softnet r = 0 (0xffff800000030170)
#0 witness_lock+0x333
#1 taskq_thread+0x92
#2 proc_trampoline+0x1c
Process 24364 (softnet) thread 0xffff8000ffffe7e0 (63340)
shared rwlock netlock r = 0 (0xffffffff8217f598)
#0 witness_lock+0x333
#1 rw_enter+0x27f
#2 if_input_process+0x75
#3 ifiq_process+0x69
#4 taskq_thread+0x9f
#5 proc_trampoline+0x1c
shared rwlock softnet r = 0 (0xffff800000030070)
#0 witness_lock+0x333
#1 taskq_thread+0x92
#2 proc_trampoline+0x1c
ddb{3}>
ddb{3}> show tdb
0xffffffff811f56cf: 000041bd (unknown address family)->(unknown address
family)
:15 #-1958084609 74f68445
ddb{3}> show tdb /f 0xffffffff811f56cf
tdb at 0xffffffff811f56cf
hnext: 0x130808b49018948
dnext: 0x138888b490000
snext: 0x3880c74901894800
inext: 0x49ffffffff000001
onext: 0xffff0000013080c7
xform: 0x8b4c002691dbe800
refcnt: -1958084609
encalgxform: 0x11080c7498845
authalgxform: 0x80c7490000000000
compalgxform: 0xf8
flags:
74f68445<UNIQUE,BYTES,DELETED,SOFT_FIRSTUSE,UDPENCAP,PFSYNC
,ESN>
seq: -34635
exp_allocations: -2133708289
soft_allocations: 1207998333
cur_allocations: 949306639
exp_bytes: -53887557573804033
soft_bytes: -7572652700634382337
cur_bytes: -8986685076615987194
exp_timeout: 1274901163972733
soft_timeout: -1044682726943537152
established: -23376416473088
first_use: 1207830954720511
soft_first_use: -2045619644365824
exp_first_use: -956873529326089985
last_used: -528463706791608320
last_marked: -5151950549254864897
cryptoid: 434571561984018687
tdb_spi: 000041bd
amxkeylen: 13
emxkeylen: 0
ivlen: 49285
sproto: 15
wnd: 133
satype: 76
updates: 248
dst: (unknown address family)
src: (unknown address family)
amxkey: 0x110b88b49ffffff
emxkey: 0x4c00268f92e80000
rpl: 76702787085813131
ids: 0xf2b8809845
ids_swapped: -578482432
mtu: 1224736758
mtutimeout: 264675222111627
udpencap_port: 0
tag: 4073
tap: 1006632963
rdomain: 1012364293
rdomain_post: -1048244478
ddb{3}>
ddb{3}> ps
PID TID PPID UID S FLAGS WAIT COMMAND
48889 516122 49524 68 3 0x10 netlock isakmpd
49524 136902 1 0 3 0x80 netio isakmpd
7860 162241 1 0 3 0x100083 ttyin ksh
29826 116692 1 0 3 0x100098 poll cron
60841 164826 66234 95 3 0x100092 kqread smtpd
55097 2406 66234 103 3 0x100092 kqread smtpd
73287 413363 66234 95 3 0x100092 kqread smtpd
72259 146575 66234 95 3 0x100092 kqread smtpd
58288 270273 66234 95 3 0x100092 kqread smtpd
12210 125515 66234 95 3 0x100092 kqread smtpd
66234 365791 1 0 3 0x100080 kqread smtpd
27746 82886 1 0 3 0x88 poll sshd
65737 129938 1 0 3 0x100080 poll ntpd
46028 11125 33739 83 3 0x100092 poll ntpd
33739 250917 1 83 3 0x100092 poll ntpd
78022 412651 14622 73 3 0x100090 kqread syslogd
14622 207875 1 0 3 0x100082 netio syslogd
95123 271769 0 0 3 0x14200 bored smr
44020 301722 0 0 3 0x14200 pgzero zerothread
95530 514370 0 0 3 0x14200 aiodoned aiodoned
26250 378519 0 0 3 0x14200 syncer update
69845 146993 0 0 3 0x14200 cleaner cleaner
19457 388805 0 0 3 0x14200 reaper reaper
54770 476596 0 0 3 0x14200 pgdaemon pagedaemon
1279 29813 0 0 3 0x14200 usbtsk usbtask
54131 121668 0 0 3 0x14200 usbatsk usbatsk
3557 323237 0 0 3 0x40014200 acpi0 acpi0
2093 393367 0 0 7 0x40014200 idle5
83675 132444 0 0 3 0x40014200 idle4
18222 312526 0 0 3 0x40014200 idle3
733 72421 0 0 3 0x40014200 idle2
20085 265763 0 0 7 0x40014200 idle1
22638 502832 0 0 3 0x14200 bored sensors
43705 258332 0 0 7 0x14200 softnet
*99913 440260 0 0 7 0x14200 softnet
39040 358256 0 0 3 0x14200 netlock softnet
24364 63340 0 0 7 0x14200 softnet
7337 83749 0 0 3 0x14200 bored systqmp
66311 283484 0 0 3 0x14200 bored systq
41321 291222 0 0 3 0x40014200 bored softclock
74282 319709 0 0 7 0x40014200 idle0
1 12703 0 0 3 0x82 wait init
0 0 -1 0 3 0x10200 scheduler swapper
ddb{3}>
ddb{3}> mach ddbcpu 0
Stopped at x86_ipi_db+0x12: leave
x86_ipi_db(ffffffff8219eff0) at x86_ipi_db+0x12
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23
__mp_lock(ffffffff822b3ae0) at __mp_lock+0xb3
softintr_dispatch(0) at softintr_dispatch+0x4a
Xsoftclock() at Xsoftclock+0x1f
acpicpu_idle() at acpicpu_idle+0x281
sched_idle(ffffffff8219eff0) at sched_idle+0x27e
end trace frame: 0x0, count: 7
ddb{0}>
ddb{0}> mach ddbcpu 1
Stopped at x86_ipi_db+0x12: leave
x86_ipi_db(ffff800022409ff0) at x86_ipi_db+0x12
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23
acpicpu_idle() at acpicpu_idle+0x281
sched_idle(ffff800022409ff0) at sched_idle+0x27e
end trace frame: 0x0, count: 10
ddb{1}>
ddb{1}> mach ddbcpu 2
Stopped at x86_ipi_db+0x12: leave
x86_ipi_db(ffff800022412ff0) at x86_ipi_db+0x12
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23
__mp_lock(ffffffff822b3ae0) at __mp_lock+0xb3
kpageflttrap(ffff800022c65210,137) at kpageflttrap+0x172
kerntrap(ffff800022c65210) at kerntrap+0x91
alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b
ipsp_spd_lookup(fffffd80a3a81a00,2,14,2,0,0,dcfe3c78989b6cd1,fffffd80a3a81a00)
at ipsp_spd_lookup+0xa2f
ip_output_ipsec_lookup(fffffd80a3a81a00,14,0,ffff800022c654c8,0) at
ip_output_ipsec_lookup+0x4c
ip_output(fffffd80a3a81a00,0,ffff800022c65688,1,0,0,6db6075421f27eb8) at
ip_output+0x39d
ip_forward(fffffd80a3a81a00,ffff800000087048,fffffd83b4374cb0,0) at
ip_forward+0x26a
ip_input_if(ffff800022c657c8,ffff800022c657d4,4,0,ffff800000087048) at
ip_input_if+0x353
ipv4_input(ffff800000087048,fffffd80a3a81a00) at ipv4_input+0x39
ether_input(ffff800000087048,fffffd80a3a81a00) at ether_input+0x3aa
end trace frame: 0xffff800022c658a0, count: 0
ddb{2}>
ddb{2}> mach ddbcpu 3
Stopped at ipsp_spd_lookup+0xa2f: movq %rax,0(%rcx)
ipsp_spd_lookup(fffffd80a46ed700,2,14,2,0,0,dcfe3c78989b6cd1,fffffd80a46ed700)
at ipsp_spd_lookup+0xa2f
ip_output_ipsec_lookup(fffffd80a46ed700,14,0,ffff800022c71248,0) at
ip_output_ipsec_lookup+0x4c
ip_output(fffffd80a46ed700,0,ffff800022c71408,1,0,0,6db6075421f27eb8) at
ip_output+0x39d
ip_forward(fffffd80a46ed700,ffff800000087048,fffffd83b4374cb0,0) at
ip_forward+0x26a
ip_input_if(ffff800022c71548,ffff800022c71554,4,0,ffff800000087048) at
ip_input_if+0x353
ipv4_input(ffff800000087048,fffffd80a46ed700) at ipv4_input+0x39
ether_input(ffff800000087048,fffffd80a46ed700) at ether_input+0x3aa
if_input_process(ffff800000087048,ffff800022c71638) at if_input_process+0x92
ifiq_process(ffff800000086e00) at ifiq_process+0x69
taskq_thread(ffff800000030200) at taskq_thread+0x9f
end trace frame: 0x0, count: 5
ddb{3}>
ddb{3}> mach ddbcpu 4
Stopped at x86_ipi_db+0x12: leave
x86_ipi_db(ffff800022424ff0) at x86_ipi_db+0x12
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23
spllower(7) at spllower+0x50
pool_cache_get(ffffffff822a3dd8) at pool_cache_get+0x1f0
pool_get(ffffffff822a3dd8,2) at pool_get+0x5e
m_clget(0,2,802) at m_clget+0x1c5
ixgbe_get_buf(ffff80000008a1d0,9) at ixgbe_get_buf+0xa3
ixgbe_rxfill(ffff80000008a1d0) at ixgbe_rxfill+0xae
ixgbe_queue_intr(ffff800000085880) at ixgbe_queue_intr+0x4f
intr_handler(ffff800022c76a60,ffff80000007d480) at intr_handler+0x6e
Xintr_ioapic_edge1_untramp() at Xintr_ioapic_edge1_untramp+0x18f
__mp_lock(ffffffff822b3ae0) at __mp_lock+0xb3
esp_output(fffffd80a35edb00,ffff80000145d018,14,9) at esp_output+0x82d
end trace frame: 0xffff800022c76c60, count: 0
ddb{4}>
ddb{4}> mach ddbcpu 5
Stopped at x86_ipi_db+0x12: leave
x86_ipi_db(ffff80002242dff0) at x86_ipi_db+0x12
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23
acpicpu_idle() at acpicpu_idle+0x281
sched_idle(ffff80002242dff0) at sched_idle+0x27e
end trace frame: 0x0, count: 10
ddb{5}>