Thanks Attilio, I set up dcons target/host pair. Target is 7.2-STABLE and host is 6.4-STABLE.
Dcons session was recorded with script. http://www.heimat.gr.jp/localhost/dcons.log >>>>> In <3bbf2fe10906060749xbbc2f2fy4c09f67711a...@mail.gmail.com> >>>>> Attilio Rao <atti...@freebsd.org> wrote: > 2) Once you get the deadlock break in the DDB debugger KDB: stack backtrace: db_trace_self_wrapper() at db_trace_self_wrapper+0x2a vfs_badlock() at vfs_badlock+0x95 assert_vop_elocked() at assert_vop_elocked+0x64 VOP_WRITE_APV() at VOP_WRITE_APV+0x155 vn_write() at vn_write+0x1ce dofilewrite() at dofilewrite+0x85 kern_pwritev() at kern_pwritev+0x66 pwrite() at pwrite+0x58 syscall() at syscall+0x1ce Xfast_syscall() at Xfast_syscall+0xab --- syscall (476, FreeBSD ELF64, pwrite), rip = 0x80074d17c, rsp = 0x7fffffffda98, rbp = 0xb4000 --- VOP_WRITE: 0xffffff004a26c000 is not exclusive locked but should be KDB: enter: lock violation [thread pid 29756 tid 100177 ] Stopped at kdb_enter_why+0x3d: movq $0,0x626418(%rip) > 3) Once you are in DDB informations which could be very useful are: db> show allpcpu db> show alllocks db> show lockedvnods db> ps db> allthreads > Note that this is a lot of printout so you won't be able of collecting > all these informations if not with a serial connection. db> show allpcpu Current CPU: 0 cpuid = 0 curthread = 0xffffff0114476ab0: pid 29756 "expireover" curpcb = 0xffffff80a3f7ed40 fpcurthread = none idlethread = 0xffffff0001589720: pid 11 "idle: cpu0" spin locks held: db> show alllocks Process 29784 (spamc) thread 0xffffff004afac720 (100170) exclusive sx so_rcv_sx r = 0 (0xffffff0065383c40) locked @ /usr/src/sys/kern/uipc_sockbuf.c:148 Process 29413 (perl) thread 0xffffff004afb0720 (100162) exclusive sx so_rcv_sx r = 0 (0xffffff00656163d0) locked @ /usr/src/sys/kern/uipc_sockbuf.c:148 Process 29409 (perl) thread 0xffffff01144ea390 (100175) exclusive sx so_rcv_sx r = 0 (0xffffff004a210970) locked @ /usr/src/sys/kern/uipc_sockbuf.c:148 Process 29406 (perl) thread 0xffffff0065899000 (100196) exclusive sx so_rcv_sx r = 0 (0xffffff00655cbc40) locked @ /usr/src/sys/kern/uipc_sockbuf.c:148 Process 1497 (perl5.8.9) thread 0xffffff0013dedab0 (100113) exclusive sx so_rcv_sx r = 0 (0xffffff004a0b9970) locked @ /usr/src/sys/kern/uipc_sockbuf.c:148 Process 1496 (ninpaths) thread 0xffffff001333a720 (100082) exclusive sx so_rcv_sx r = 0 (0xffffff004a33d100) locked @ /usr/src/sys/kern/uipc_sockbuf.c:148 Process 1494 (perl5.8.9) thread 0xffffff0013dcf720 (100107) exclusive sx so_rcv_sx r = 0 (0xffffff004a33d6a0) locked @ /usr/src/sys/kern/uipc_sockbuf.c:148 Process 1397 (python2.5) thread 0xffffff0013dd1ab0 (100098) exclusive sx so_rcv_sx r = 0 (0xffffff00655cc3d0) locked @ /usr/src/sys/kern/uipc_sockbuf.c:148 db> show lockedvnods Locked vnodes 0xffffff004a26c000: tag ufs, type VREG usecount 5, writecount 2, refcount 587 mountedhere 0 flags (VI_OBJDIRTY) v_object 0xffffff004a1fad80 ref 3 pages 4604 lock type ufs: SHARED (count 1) ino 3157042, on dev ad4s1f db> ps pid ppid pgrp uid state wmesg wchan cmd 29803 1443 1406 8 S nanslp 0xffffffff80b58688 sleep 29797 1534 1534 0 S connec 0xffffff00654815fe sendmail 29785 1499 1499 110 S lockf 0xffffff0065a19000 perl [snip] db> allthreads No such command db> panic panic: from debugger cpuid = 0 KDB: stack backtrace: db_trace_self_wrapper() at db_trace_self_wrapper+0x2a panic() at panic+0x182 db_panic() at db_panic+0x17 db_command() at db_command+0x1ef db_command_loop() at db_command_loop+0x50 db_trap() at db_trap+0x89 kdb_trap() at kdb_trap+0x95 trap() at trap+0x295 calltrap() at calltrap+0x8 --- trap 0x3, rip = 0xffffffff8054694d, rsp = 0xffffff80a3f7e850, rbp = 0xffffff80a3f7e870 --- kdb_enter_why() at kdb_enter_why+0x3d assert_vop_elocked() at assert_vop_elocked+0x64 VOP_WRITE_APV() at VOP_WRITE_APV+0x155 vn_write() at vn_write+0x1ce dofilewrite() at dofilewrite+0x85 kern_pwritev() at kern_pwritev+0x66 pwrite() at pwrite+0x58 syscall() at syscall+0x1ce Xfast_syscall() at Xfast_syscall+0xab --- syscall (476, FreeBSD ELF64, pwrite), rip = 0x80074d17c, rsp = 0x7fffffffda98, rbp = 0xb4000 --- Uptime: 3h20m7s Physical memory: 6121 MB Dumping 1730 MB: (CTRL-C to abort) ... Dump complete Automatic reboot in 15 seconds - press a key on the console to abort --> Press a key on the console to reboot, --> or switch off the system now. Rebooting... And, here is a backtrace. (kgdb) bt #0 doadump () at pcpu.h:195 #1 0xffffffff80517e73 in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:418 #2 0xffffffff805182fc in panic (fmt=Variable "fmt" is not available. ) at /usr/src/sys/kern/kern_shutdown.c:574 #3 0xffffffff801c6817 in db_panic (addr=Variable "addr" is not available. ) at /usr/src/sys/ddb/db_command.c:446 #4 0xffffffff801c710f in db_command (last_cmdp=0xffffffff80b21088, cmd_table=0x0, dopager=1) at /usr/src/sys/ddb/db_command.c:413 #5 0xffffffff801c7320 in db_command_loop () at /usr/src/sys/ddb/db_command.c:466 #6 0xffffffff801c8c59 in db_trap (type=Variable "type" is not available. ) at /usr/src/sys/ddb/db_main.c:228 #7 0xffffffff80546775 in kdb_trap (type=3, code=0, tf=0xffffff80a3f7e7a0) at /usr/src/sys/kern/subr_kdb.c:524 #8 0xffffffff807d6925 in trap (frame=0xffffff80a3f7e7a0) at /usr/src/sys/amd64/amd64/trap.c:531 #9 0xffffffff807ba53e in calltrap () at /usr/src/sys/amd64/amd64/exception.S:209 #10 0xffffffff8054694d in kdb_enter_why (why=0xffffffff808caa99 "vfslock", msg=0xa <Address 0xa out of bounds>) at cpufunc.h:63 #11 0xffffffff8059d804 in assert_vop_elocked (vp=0xffffff004a26c000, str=0xffffffff80907391 "VOP_WRITE") at /usr/src/sys/kern/vfs_subr.c:3679 #12 0xffffffff8081da55 in VOP_WRITE_APV (vop=Variable "vop" is not available. ) at vnode_if.c:702 #13 0xffffffff805aad6e in vn_write (fp=0xffffff0003a13d80, uio=0xffffff80a3f7eb00, active_cred=Variable "active_cred" is not available. ) at vnode_if.h:373 ---Type <return> to continue, or q <return> to quit--- #14 0xffffffff80559215 in dofilewrite (td=0xffffff0114476ab0, fd=15, fp=0xffffff0003a13d80, auio=0xffffff80a3f7eb00, offset=Variable "offset" is not available. ) at file.h:257 #15 0xffffffff80559386 in kern_pwritev (td=0xffffff0114476ab0, fd=15, auio=0xffffff80a3f7eb00, offset=770655) at /usr/src/sys/kern/sys_generic.c:450 #16 0xffffffff80559418 in pwrite (td=Variable "td" is not available. ) at /usr/src/sys/kern/sys_generic.c:351 #17 0xffffffff807d61de in syscall (frame=0xffffff80a3f7ec80) at /usr/src/sys/amd64/amd64/trap.c:900 #18 0xffffffff807ba74b in Xfast_syscall () at /usr/src/sys/amd64/amd64/exception.S:330 #19 0x000000080074d17c in ?? () Previous frame inner to this frame (corrupt stack?) > 4) Dump the content so that we can further look at locks structure > states once we identify something useful (ideally, keeping the machine > up in DDB for that would be very useful, but often not viable) The target machine is my SMTP server and it's hard to keep it in DDB. > Let me know. I hope this helps debugging. Thanks. -- NAKAJI Hiroyuki _______________________________________________ freebsd-stable@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-stable To unsubscribe, send any mail to "freebsd-stable-unsubscr...@freebsd.org"