On Wed, Apr 26, 2023 at 11:17:32AM +0200, Alexander Bluhm wrote:
> On Tue, Apr 25, 2023 at 11:57:09PM +0300, Vitaliy Makkoveev wrote:
> > On Tue, Apr 25, 2023 at 11:44:34AM +0200, Alexander Bluhm wrote:
> > > Hi,
> > >
> > > Mutex arp_mtx protects the llinfo_arp la_...  fields.  So kernel
> > > lock is only needed for changing the route rt_flags.
> > >
> > > Of course there is a race between checking and setting rt_flags.
> > > But the other checks of the RTF_REJECT flags were without kernel
> > > lock before.  This does not cause trouble, the worst thing that may
> > > happen is to wait another exprire time for ARP retry.  My diff does
> > > not make it worse, reading rt_flags and rt_expire is done without
> > > lock anyway.
> > >
> > > The kernel lock is needed to change rt_flags.  Testing without
> > > KERNEL_LOCK() caused crashes.
> > >
> >
> > Hi,
> >
> > I'm interesting is the system stable with the diff below? If so, we
> > could avoid kernel lock in the arpresolve().
> 
> I could not crash it.

I was too fast.  Just after writing this mail I restarted the test.

[0] 0:arp- 1:ksh*                        "ot31.obsd-lab.genua.d" 12:00 
26-Apr-23ESC[mESC(BESC[23;18Hpanic: pool_do_get: art_node free list modified: 
page 0xfffffd8747128000; item addr 0xfffffd8747128410; offset 
0x0=0x182f4660f2a7188a != 0x182f4660f2a71889
Stopped at      db_enter+0x14:  popq    %rbp
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
  45805  80626      0     0x14000      0x200    3  reaper
 353816  99629      0     0x14000      0x200    1  softnet
 487701  10647      0     0x14000      0x200    2  softnet
 152789  43620      0     0x14000      0x200    7  softnet
*356742  68683      0     0x14000      0x200    5  softnet
db_enter() at db_enter+0x14
panic(ffffffff8213f5d0) at panic+0xc3
pool_do_get(ffffffff824ae060,a,ffff8000247b71b4) at pool_do_get+0x321
pool_get(ffffffff824ae060,a) at pool_get+0x9a
art_get(ffff8000027ceac0,20) at art_get+0x30
rtable_insert(0,ffff8000027ceac0,0,ffff8000247b72f0,3,fffffd8745e4a948) at rtab
le_insert+0x1a2
rtrequest(b,ffff8000247b73f8,3,ffff8000247b7498,0) at rtrequest+0x613
rt_clone(ffff8000247b7500,ffff8000247b7558,0) at rt_clone+0x77
rtalloc_mpath(ffff8000247b7558,fffffd800369aad8,0) at rtalloc_mpath+0x50
in_ouraddr(fffffd80a94fcd00,ffff80000077e048,ffff8000247b75d8) at in_ouraddr+0x
88
ip_input_if(ffff8000247b7678,ffff8000247b7684,4,0,ffff80000077e048) at ip_input
ipv4_input(ffff80000077e048,fffffd80a94fcd00) at ipv4_input+0x3d
ether_input(ffff80000077e048,fffffd80a94fcd00) at ether_input+0x3b5
if_input_process(ffff80000077e048,ffff8000247b7768) at if_input_process+0x6f
end trace frame: 0xffff8000247b77b0, count: 0
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.  Insufficient info makes it difficult to find and fix bugs.

ddb{5}> show panic
*cpu5: pool_do_get: art_node free list modified: page 0xfffffd8747128000; item a
ddr 0xfffffd8747128410; offset 0x0=0x182f4660f2a7188a != 0x182f4660f2a71889

ddb{5}> show register
rdi                                0
rsi                             0x14
rbp               0xffff8000247b7060
rbx                                0
rdx               0xc000000000000000
rcx                            0x286
rax                             0x9c
r8                 0x101010101010101
r9                                 0
r10               0xcdd678d0954ec026
r11               0x92611e3f4c85263e
r12               0xffff80002252e990
r13                                0
r14                                0
r15               0xffffffff8213f5d0    cy_pio_rec+0x1ea86
rip               0xffffffff81b0f124    db_enter+0x14
cs                               0x8
rflags                         0x282
rsp               0xffff8000247b7060
ss                              0x10
db_enter+0x14:  popq    %rbp

ddb{5}> trace
db_enter() at db_enter+0x14
panic(ffffffff8213f5d0) at panic+0xc3
pool_do_get(ffffffff824ae060,a,ffff8000247b71b4) at pool_do_get+0x321
pool_get(ffffffff824ae060,a) at pool_get+0x9a
art_get(ffff8000027ceac0,20) at art_get+0x30
rtable_insert(0,ffff8000027ceac0,0,ffff8000247b72f0,3,fffffd8745e4a948) at rtab
le_insert+0x1a2
rtrequest(b,ffff8000247b73f8,3,ffff8000247b7498,0) at rtrequest+0x613
rt_clone(ffff8000247b7500,ffff8000247b7558,0) at rt_clone+0x77
rtalloc_mpath(ffff8000247b7558,fffffd800369aad8,0) at rtalloc_mpath+0x50
in_ouraddr(fffffd80a94fcd00,ffff80000077e048,ffff8000247b75d8) at in_ouraddr+0x
88
ip_input_if(ffff8000247b7678,ffff8000247b7684,4,0,ffff80000077e048) at ip_input
_if+0x1f0
ipv4_input(ffff80000077e048,fffffd80a94fcd00) at ipv4_input+0x3d
ether_input(ffff80000077e048,fffffd80a94fcd00) at ether_input+0x3b5
if_input_process(ffff80000077e048,ffff8000247b7768) at if_input_process+0x6f
ifiq_process(ffff800000782400) at ifiq_process+0x75
taskq_thread(ffff800000036000) at taskq_thread+0x100
end trace frame: 0x0, count: -16

ddb{5}> ps
   PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
  3208  212022  39561      0  3    0x10008b  sigsusp       timeout
  6160  324932   4896      0  3    0x100083  ttyin         ksh
 39561  309293   4896      0  3    0x10008b  sigsusp       ksh
  4896   16573      1      0  3    0x100080  kqread        tmux
 91788  275888  20986      0  3    0x100083  kqread        tmux
 20986   27136      1      0  3    0x10008b  sigsusp       ksh
 59912  394974      1      0  3    0x100098  kqread        cron
 25465  470631      1     99  3   0x1100090  kqread        sndiod
 55473  426797      1    110  3    0x100090  kqread        sndiod
 59948  369086  93818     95  3   0x1100092  kqread        smtpd
 91703  135835  93818    103  3   0x1100092  kqread        smtpd
 96840  246769  93818     95  3   0x1100092  kqread        smtpd
 52992  383912  93818     95  3    0x100092  kqread        smtpd
 99867  251002  93818     95  3   0x1100092  kqread        smtpd
 82244  385303  93818     95  3   0x1100092  kqread        smtpd
 93818  366740      1      0  3    0x100080  kqread        smtpd
 73222  145148  94559     89  3   0x1100092  kqread        relayd
 23499  417534  94559     89  3   0x1100092  kqread        relayd
  7002  187795  94559     89  3   0x1100092  kqread        relayd
  1328   96961  94559     89  3   0x1100092  kqread        relayd
 35216  236120  94559     89  3   0x1100092  kqread        relayd
 44997   23635  94559     89  3   0x1100092  kqread        relayd
 82930   19018  94559     89  3   0x1100092  kqread        relayd
 42026   16480  94559     89  3   0x1100092  kqread        relayd
 94559  253040      1      0  3        0x80  kqread        relayd
 89865  410855      1      0  3        0x88  kqread        sshd
 22339  294875      0      0  3     0x14280  nfsidl        nfsio
 89599   51352      0      0  3     0x14280  nfsidl        nfsio
 63849  361892      0      0  3     0x14280  nfsidl        nfsio
 30664  357919      0      0  3     0x14280  nfsidl        nfsio
 10499  399494      1      0  3    0x100080  kqread        ntpd
 97199  131290  30807     83  3    0x100092  kqread        ntpd
 30807  203991      1     83  3   0x1100092  kqread        ntpd
 26198  299506  99348     74  3   0x1100092  bpf           pflogd
 99348  201119      1      0  3        0x80  netio         pflogd
 96301  342337  50719     73  3   0x1100090  kqread        syslogd
 50719  204195      1      0  3    0x100082  netio         syslogd
 56409  269750  82339     77  3    0x100092  kqread        dhcpleased
 38043  210823  82339     77  3    0x100092  kqread        dhcpleased
 82339  321261      1      0  3        0x80  kqread        dhcpleased
 78726  171179  42866    115  3    0x100092  kqread        slaacd
 64825  522042  42866    115  3    0x100092  kqread        slaacd
 42866  121490      1      0  3    0x100080  kqread        slaacd
 27056  523309      0      0  3     0x14200  bored         smr
 60893   19168      0      0  3     0x14200  pgzero        zerothread
 53700  375669      0      0  3     0x14200  aiodoned      aiodoned
 42976   46427      0      0  3     0x14200  syncer        update
 71949  418239      0      0  3     0x14200  cleaner       cleaner
 80626   45805      0      0  7     0x14200                reaper
 54869   43057      0      0  3     0x14200  pgdaemon      pagedaemon
 84624  479667      0      0  3     0x14200  usbtsk        usbtask
 34897   79859      0      0  3     0x14200  usbatsk       usbatsk
 66093    3735      0      0  3  0x40014200  acpi0         acpi0
 86817  206525      0      0  3  0x40014200                idle7
 91530   81236      0      0  7  0x40014200                idle6
 26856  190910      0      0  3  0x40014200                idle5
 59400   12485      0      0  7  0x40014200                idle4
 46837   20343      0      0  3  0x40014200                idle3
 13360   60775      0      0  3  0x40014200                idle2
 75344  252894      0      0  3  0x40014200                idle1
 59939  347814      0      0  3     0x14200  bored         sensors
 99629  353816      0      0  7     0x14200                softnet
 10647  487701      0      0  7     0x14200                softnet
 43620  152789      0      0  7     0x14200                softnet
*68683  356742      0      0  7     0x14200                softnet
 13264  491700      0      0  3     0x14200  arttfini      systqmp
 59941  489236      0      0  3     0x14200  bored         systq
 17414  459279      0      0  3  0x40014200  bored         softclock
 84124  362612      0      0  7  0x40014200                idle0
     1   38941      0      0  3        0x82  wait          init
     0       0     -1      0  3     0x10200  scheduler     swapper

bluhm

Reply via email to