On 21 Jul, G. Paul Ziemba wrote:
> GENERIC kernel r321349 results in the following about a minute after
> multiuser boot completes.
> 
> What additional information should I provide to assist in debugging?
> 
> Many thanks!
> 
> [Extracted from /var/crash/core.txt.NNN]
> 
> KDB: stack backtrace:
> #0 0xffffffff810f6ed7 at kdb_backtrace+0xa7
> #1 0xffffffff810872a9 at vpanic+0x249
> #2 0xffffffff81087060 at vpanic+0
> #3 0xffffffff817d9aca at dblfault_handler+0x10a
> #4 0xffffffff817ae93c at Xdblfault+0xac
> #5 0xffffffff810cf76e at cpu_search_lowest+0x35e
> #6 0xffffffff810cf76e at cpu_search_lowest+0x35e
> #7 0xffffffff810d5b36 at sched_lowest+0x66
> #8 0xffffffff810d1d92 at sched_pickcpu+0x522
> #9 0xffffffff810d2b03 at sched_add+0xd3
> #10 0xffffffff8101df5c at intr_event_schedule_thread+0x18c
> #11 0xffffffff8101ddb0 at swi_sched+0xa0
> #12 0xffffffff81261643 at netisr_queue_internal+0x1d3
> #13 0xffffffff81261212 at netisr_queue_src+0x92
> #14 0xffffffff81261677 at netisr_queue+0x27
> #15 0xffffffff8123da5a at if_simloop+0x20a
> #16 0xffffffff8123d83b at looutput+0x22b
> #17 0xffffffff8131c4c6 at ip_output+0x1aa6
> 
> doadump (textdump=1) at /usr/src/sys/kern/kern_shutdown.c:298
> 298             dumptid = curthread->td_tid;
> (kgdb) #0  doadump (textdump=1) at /usr/src/sys/kern/kern_shutdown.c:298
> #1  0xffffffff810867e8 in kern_reboot (howto=260)
>     at /usr/src/sys/kern/kern_shutdown.c:366
> #2  0xffffffff810872ff in vpanic (fmt=0xffffffff81e5f7e0 "double fault", 
>     ap=0xfffffe0839778ec0) at /usr/src/sys/kern/kern_shutdown.c:759
> #3  0xffffffff81087060 in panic (fmt=0xffffffff81e5f7e0 "double fault")
>     at /usr/src/sys/kern/kern_shutdown.c:690
> #4  0xffffffff817d9aca in dblfault_handler (frame=0xfffffe0839778f40)
>     at /usr/src/sys/amd64/amd64/trap.c:828
> #5  <signal handler called>
> #6  0xffffffff810cf422 in cpu_search_lowest (
>     cg=0xffffffff826ccd98 <group+280>, 
>     low=<error reading variable: Cannot access memory at address 
> 0xfffffe085cfa4
> ff8>) at /usr/src/sys/kern/sched_ule.c:782
> #7  0xffffffff810cf76e in cpu_search (cg=0xffffffff826cccb8 <group+56>, 
>     low=0xfffffe085cfa53b8, high=0x0, match=1)
>     at /usr/src/sys/kern/sched_ule.c:710
> #8  cpu_search_lowest (cg=0xffffffff826cccb8 <group+56>, 
>     low=0xfffffe085cfa53b8) at /usr/src/sys/kern/sched_ule.c:783
> #9  0xffffffff810cf76e in cpu_search (cg=0xffffffff826ccc80 <group>, 
>     low=0xfffffe085cfa5430, high=0x0, match=1)
>     at /usr/src/sys/kern/sched_ule.c:710
> #10 cpu_search_lowest (cg=0xffffffff826ccc80 <group>, low=0xfffffe085cfa5430)
>     at /usr/src/sys/kern/sched_ule.c:783
> #11 0xffffffff810d5b36 in sched_lowest (cg=0xffffffff826ccc80 <group>, 
>     mask=..., pri=28, maxload=2147483647, prefer=4)
>     at /usr/src/sys/kern/sched_ule.c:815
> #12 0xffffffff810d1d92 in sched_pickcpu (td=0xfffff8000a3a9000, flags=4)
>     at /usr/src/sys/kern/sched_ule.c:1292
> #13 0xffffffff810d2b03 in sched_add (td=0xfffff8000a3a9000, flags=4)
>     at /usr/src/sys/kern/sched_ule.c:2447
> #14 0xffffffff8101df5c in intr_event_schedule_thread (ie=0xfffff80007e7ae00)
>     at /usr/src/sys/kern/kern_intr.c:917
> #15 0xffffffff8101ddb0 in swi_sched (cookie=0xfffff8000a386880, flags=0)
>     at /usr/src/sys/kern/kern_intr.c:1163
> #16 0xffffffff81261643 in netisr_queue_internal (proto=1, 
>     m=0xfffff80026d00500, cpuid=0) at /usr/src/sys/net/netisr.c:1022
> #17 0xffffffff81261212 in netisr_queue_src (proto=1, source=0, 
>     m=0xfffff80026d00500) at /usr/src/sys/net/netisr.c:1056
> #18 0xffffffff81261677 in netisr_queue (proto=1, m=0xfffff80026d00500)
>     at /usr/src/sys/net/netisr.c:1069
> #19 0xffffffff8123da5a in if_simloop (ifp=0xfffff800116eb000, 
>     m=0xfffff80026d00500, af=2, hlen=0) at /usr/src/sys/net/if_loop.c:358
> #20 0xffffffff8123d83b in looutput (ifp=0xfffff800116eb000, 
>     m=0xfffff80026d00500, dst=0xfffff80026ed6550, ro=0xfffff80026ed6530)
>     at /usr/src/sys/net/if_loop.c:265
> #21 0xffffffff8131c4c6 in ip_output (m=0xfffff80026d00500, opt=0x0, 
>     ro=0xfffff80026ed6530, flags=0, imo=0x0, inp=0xfffff80026ed63a0)
>     at /usr/src/sys/netinet/ip_output.c:655
> #22 0xffffffff8142e1c7 in tcp_output (tp=0xfffff80026eb2820)
>     at /usr/src/sys/netinet/tcp_output.c:1447
> #23 0xffffffff81447700 in tcp_usr_send (so=0xfffff80011ec2360, flags=0, 
>     m=0xfffff80026d14d00, nam=0x0, control=0x0, td=0xfffff80063ba1000)
>     at /usr/src/sys/netinet/tcp_usrreq.c:967
> #24 0xffffffff811776f1 in sosend_generic (so=0xfffff80011ec2360, addr=0x0, 
>     uio=0x0, top=0xfffff80026d14d00, control=0x0, flags=0, 
>     td=0xfffff80063ba1000) at /usr/src/sys/kern/uipc_socket.c:1360
> #25 0xffffffff811779bd in sosend (so=0xfffff80011ec2360, addr=0x0, uio=0x0, 
>     top=0xfffff80026d14d00, control=0x0, flags=0, td=0xfffff80063ba1000)
>     at /usr/src/sys/kern/uipc_socket.c:1405
> #26 0xffffffff815276a2 in clnt_vc_call (cl=0xfffff80063ca0980, 
>     ext=0xfffffe085cfa6e38, proc=4, args=0xfffff80026c3bc00, 
>     resultsp=0xfffffe085cfa7110, utimeout=...)
>     at /usr/src/sys/rpc/clnt_vc.c:413
> #27 0xffffffff8152391c in clnt_reconnect_call (cl=0xfffff80063ca0c00, 
>     ext=0xfffffe085cfa6e38, proc=4, args=0xfffff80026c3bc00, 
>     resultsp=0xfffffe085cfa7110, utimeout=...)
>     at /usr/src/sys/rpc/clnt_rc.c:271
> #28 0xffffffff80e75628 in newnfs_request (nd=0xfffffe085cfa7110, 
>     nmp=0xfffff80007e79c00, clp=0x0, nrp=0xfffff80007e79d28, 
>     vp=0xfffff80011d9b588, td=0xfffff80063ba1000, cred=0xfffff800118c0100, 
>     prog=100003, vers=3, retsum=0x0, toplevel=1, xidp=0x0, dssep=0x0)
>     at /usr/src/sys/fs/nfs/nfs_commonkrpc.c:760
> #29 0xffffffff80ee87f1 in nfscl_request (nd=0xfffffe085cfa7110, 
>     vp=0xfffff80011d9b588, p=0xfffff80063ba1000, cred=0xfffff800118c0100, 
>     stuff=0x0) at /usr/src/sys/fs/nfsclient/nfs_clport.c:952
> #30 0xffffffff80ea865c in nfsrpc_accessrpc (vp=0xfffff80011d9b588, mode=63, 
>     cred=0xfffff800118c0100, p=0xfffff80063ba1000, nap=0xfffffe085cfa72e0, 
>     attrflagp=0xfffffe085cfa73c0, rmodep=0xfffffe085cfa73b4, stuff=0x0)
>     at /usr/src/sys/fs/nfsclient/nfs_clrpcops.c:243
> #31 0xffffffff80ed9ec9 in nfs34_access_otw (vp=0xfffff80011d9b588, wmode=63, 
>     td=0xfffff80063ba1000, cred=0xfffff800118c0100, 
>     retmode=0xfffffe085cfa7540) at /usr/src/sys/fs/nfsclient/nfs_clvnops.c:283
> #32 0xffffffff80ecfb64 in nfs_access (ap=0xfffffe085cfa75f8)
>     at /usr/src/sys/fs/nfsclient/nfs_clvnops.c:426
> #33 0xffffffff81a539d4 in VOP_ACCESS_APV (
>     vop=0xffffffff822ff8b8 <newnfs_vnodeops>, a=0xfffffe085cfa75f8)
>     at vnode_if.c:601
> #34 0xffffffff80eda726 in VOP_ACCESS (vp=0xfffff80011d9b588, accmode=64, 
>     cred=0xfffff800118c0100, td=0xfffff80063ba1000) at ./vnode_if.h:254
> #35 0xffffffff80ecb925 in nfs_lookup (ap=0xfffffe085cfa7cf8)
>     at /usr/src/sys/fs/nfsclient/nfs_clvnops.c:1064
> #36 0xffffffff81a52a44 in VOP_LOOKUP_APV (
>     vop=0xffffffff822ff8b8 <newnfs_vnodeops>, a=0xfffffe085cfa7cf8)
>     at vnode_if.c:127
> #37 0xffffffff811c6aad in VOP_LOOKUP (dvp=0xfffff80011d9b588, 
>     vpp=0xfffffe085cfa8708, cnp=0xfffffe085cfa8730) at ./vnode_if.h:54
> #38 0xffffffff811c5b64 in lookup (ndp=0xfffffe085cfa86a8)
>     at /usr/src/sys/kern/vfs_lookup.c:886
> #39 0xffffffff811c4aa2 in namei (ndp=0xfffffe085cfa86a8)
>     at /usr/src/sys/kern/vfs_lookup.c:448
> #40 0xffffffff810050f0 in do_execve (td=0xfffff80063ba1000, 
>     args=0xfffffe085cfa8838, mac_p=0x0) at /usr/src/sys/kern/kern_exec.c:446
> #41 0xffffffff810047fa in kern_execve (td=0xfffff80063ba1000, 
>     args=0xfffffe085cfa8838, mac_p=0x0) at /usr/src/sys/kern/kern_exec.c:347
> #42 0xffffffff810041e2 in sys_execve (td=0xfffff80063ba1000, 
>     uap=0xfffff80063ba1538) at /usr/src/sys/kern/kern_exec.c:221
> #43 0xffffffff817da5ed in syscallenter (td=0xfffff80063ba1000)
>     at /usr/src/sys/amd64/amd64/../../kern/subr_syscall.c:131
> #44 0xffffffff817d9d0b in amd64_syscall (td=0xfffff80063ba1000, traced=0)
>     at /usr/src/sys/amd64/amd64/trap.c:903
> #45 <signal handler called>
> #46 0x0000000800d5285a in ?? ()
> Backtrace stopped: Cannot access memory at address 0x7fffffffe7d8
> (kgdb) 

The double fault is a pretty good indication that you overflowed the
kernel stack.  Having ~40 frames on the stack when the fault happened is
consistent with that.

It looks like you are trying to execute a program from an NFS file
system that is exported by the same host.  This isn't exactly optimal
...

Your best bet for a quick workaround for the stack overflow would be to
rebuild the kernel with a larger value of KSTACK_PAGES.  You can find
teh default in /usr/src/sys/<arch>/conf/NOTES.

It would probably be a good idea to compute the differences in the stack
pointer values between adjacent stack frames to see of any of them are
consuming an excessive amount of stack space.

_______________________________________________
freebsd-stable@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-stable
To unsubscribe, send any mail to "freebsd-stable-unsubscr...@freebsd.org"

Reply via email to