----- Original Message ----- From: "Andriy Gapon" <a...@freebsd.org>
To: "Steven Hartland" <kill...@multiplay.co.uk>
Cc: <freebsd-stable@FreeBSD.org>
Sent: Wednesday, August 17, 2011 1:56 PM
Subject: Re: debugging frequent kernel panics on 8.2-RELEASE


on 17/08/2011 15:15 Steven Hartland said the following:
define allpcpu
set $i = 0
while ($i <= mp_maxid)
p *cpuid_to_pcpu[$i]
set $i = $i + 1
end
end
allpcpu

Here's the output.
[snip]
$3 = {pc_curthread = 0xffffff06b7f9c000, pc_idlethread = 0xffffff0012d85460,
pc_fpcurthread = 0x0, pc_deadthread = 0x0, pc_curpcb = 0xffffff8d8f35ad00,
pc_switchtime = 564139963042291, pc_switchticks = 247796550, pc_cpuid = 2,
 pc_cpumask = 4, pc_other_cpus = 16777211, pc_allcpu = {sle_next =
0xffffffff808af680}, pc_spinlocks = 0x0, pc_cnt = {v_swtch = 1005391948, v_trap 
=
95927887, v_syscall = 2033274537, v_intr = 137253, v_soft = 151981308,
   v_vm_faults = 14199910, v_cow_faults = 1468132, v_cow_optim = 533, v_zfod =
11032593, v_ozfod = 0, v_swapin = 0, v_swapout = 0, v_swappgsin = 0, 
v_swappgsout
= 0, v_vnodein = 17238, v_vnodeout = 48, v_vnodepgsin = 17238,
   v_vnodepgsout = 378, v_intrans = 6753, v_reactivated = 0, v_pdwakeups = 0,
v_pdpages = 0, v_tcached = 0, v_dfree = 0, v_pfree = 0, v_tfree = 15435380,
v_page_size = 0, v_page_count = 0, v_free_reserved = 0,
   v_free_target = 0, v_free_min = 0, v_free_count = 0, v_wire_count = 0,
v_active_count = 0, v_inactive_target = 0, v_inactive_count = 0, v_cache_count =
0, v_cache_min = 0, v_cache_max = 0, v_pageout_free_min = 0,
   v_interrupt_free_min = 0, v_free_severe = 0, v_forks = 24041, v_vforks = 
16857,
v_rforks = 0, v_kthreads = 0, v_forkpages = 6281292, v_vforkpages = 3606842,
v_rforkpages = 0, v_kthreadpages = 0}, pc_cp_time = {8629094,
   693, 594838, 24425, 23707811}, pc_device = 0xffffff0012da2500, pc_netisr = 
0x0,
pc_rm_queue = {rmq_next = 0xffffffff808afa50, rmq_prev = 0xffffffff808afa50},
pc_dynamic = 18446743526093326592,
 pc_monitorbuf = '\0' <repeats 127 times>, pc_prvspace = 0xffffffff808af900,
pc_curpmap = 0xffffffff8083ea50, pc_tssp = 0xffffffff808ae7d0, pc_commontssp =
0xffffffff808ae7d0, pc_rsp0 = -491518579456,
 pc_scratch_rsp = 140737488347240, pc_apic_id = 2, pc_acpi_id = 2, pc_fs32p =
0xffffffff808ad600, pc_gs32p = 0xffffffff808ad608, pc_ldt = 0xffffffff808ad648,
pc_tss = 0xffffffff808ad638, pc_cmci_mask = 8}
[snip]

Thank you.
A few more questions:
1. more kgdb info for the core:
p *(cpuid_to_pcpu[2]->pc_curthread)
p *(cpuid_to_pcpu[2]->pc_curthread->td_proc)
p *(cpuid_to_pcpu[2]->pc_curthread->td_proc->p_limit)


(kgdb) p *(cpuid_to_pcpu[2]->pc_curthread)
$1 = {td_lock = 0xffffffff8084a440, td_proc = 0xffffff070b5a48c0, td_plist = {tqe_next = 0x0, tqe_prev = 0xffffff070b5a48d0}, td_runq = {tqe_next = 0x0, tqe_prev = 0xffffffff8084a688}, td_slpq = {tqe_next = 0x0, tqe_prev = 0xffffff0296460900}, td_lockq = {tqe_next = 0x0, tqe_prev = 0xffffff8d8fb5c8b0}, td_cpuset = 0xffffff0012d65dc8, td_sel = 0xffffff0a1b76c700, td_sleepqueue = 0xffffff0296460900, td_turnstile = 0xffffff05f31d8000, td_umtxq = 0xffffff05513d9780, td_tid = 102057, td_sigqueue = {sq_signals = {__bits = {0, 0, 0, 0}}, sq_kill = {__bits = {0, 0, 0, 0}}, sq_list = {tqh_first = 0x0, tqh_last = 0xffffff06b7f9c0a0}, sq_proc = 0xffffff070b5a48c0, sq_flags = 1}, td_flags = 6, td_inhibitors = 0, td_pflags = 0, td_dupfd = 0, td_sqqueue = 0, td_wchan = 0x0, td_wmesg = 0x0, td_lastcpu = 2 '\002', td_oncpu = 2 '\002', td_owepreempt = 0 '\0', td_tsqueue = 0 '\0', td_locks = 998, td_rw_rlocks = 0, td_lk_slocks = 0, td_blocked = 0x0, td_lockname = 0x0, td_contested = {lh_first = 0x0}, td_sleeplocks = 0x0, td_intr_nesting_level = 0, td_pinned = 1, td_ucred = 0xffffff0551cf9900, td_estcpu = 0, td_slptick = 0, td_blktick = 0, td_ru = {ru_utime = {tv_sec = 0, tv_usec = 0}, ru_stime = {tv_sec = 0, tv_usec = 0}, ru_maxrss = 2068, ru_ixrss = 5280, ru_idrss = 19296, ru_isrss = 6144, ru_minflt = 5015, ru_majflt = 0, ru_nswap = 0, ru_inblock = 0, ru_oublock = 0, ru_msgsnd = 241, ru_msgrcv = 2076, ru_nsignals = 1, ru_nvcsw = 2264, ru_nivcsw = 159}, td_incruntime = 4257692, td_runtime = 487523210, td_pticks = 0, td_sticks = 0, td_iticks = 0, td_uticks = 0, td_intrval = 4, td_oldsigmask = {__bits = {0, 0, 0, 0}}, td_sigmask = {__bits = {16384, 0, 0, 0}}, td_generation = 2423, td_sigstk = {ss_sp = 0x0, ss_size = 0, ss_flags = 4}, td_xsig = 0, td_profil_addr = 0, td_profil_ticks = 0, td_name = "httpd", '\0' <repeats 14 times>, td_fpop = 0x0, td_dbgflags = 0, td_dbgksi = { ksi_link = {tqe_next = 0x0, tqe_prev = 0x0}, ksi_info = {si_signo = 0, si_errno = 0, si_code = 0, si_pid = 0, si_uid = 0, si_status = 0, si_addr = 0x0, si_value = {sival_int = 0, sival_ptr = 0x0, sigval_int = 0, sigval_ptr = 0x0}, _reason = {_fault = {_trapno = 0}, _timer = {_timerid = 0, _overrun = 0}, _mesgq = {_mqd = 0}, _poll = {_band = 0}, __spare__ = {__spare1__ = 0, __spare2__ = {0, 0, 0, 0, 0, 0, 0}}}}, ksi_flags = 0, ksi_sigq = 0x0}, td_ng_outbound = 0, td_osd = {osd_nslots = 0, osd_slots = 0x0, osd_next = {le_next = 0x0, le_prev = 0x0}}, td_rqindex = 32 ' ', td_base_pri = 128 '\200', td_priority = 128 '\200', td_pri_class = 3 '\003', td_user_pri = 128 '\200', td_base_user_pri = 128 '\200', td_pcb = 0xffffff8d8f35ad00, td_state = TDS_RUNNING, td_retval = {0, 8}, td_slpcallout = {c_links = {sle = {sle_next = 0x0}, tqe = {tqe_next = 0x0, tqe_prev = 0xffffff800088ce00}}, c_time = 247622368, c_arg = 0xffffff06b7f9c000, c_func = 0xffffffff803c4bd0 <sleepq_timeout>, c_lock = 0x0, c_flags = 16, c_cpu = 13}, td_frame = 0xffffff8d8f35ac40, td_kstack_obj = 0xffffff0a51ee5e58, td_kstack = 18446743582190956544, td_kstack_pages = 4, td_unused1 = 0x0, td_unused2 = 0, td_unused3 = 0, td_critnest = 0, td_md = {md_spinlock_count = 0, md_saved_flags = 70}, td_sched = 0xffffff06b7f9c428, td_ar = 0x0, td_syscalls = 129862, td_lprof = {{lh_first = 0x0}, {lh_first = 0x0}}, td_dtrace = 0x0, td_errno = 0, td_vnet = 0x0, td_vnet_lpush = 0x0, td_rux = {rux_runtime = 483265518,
   rux_uticks = 7, rux_sticks = 17, rux_iticks = 0, rux_uu = 0, rux_su = 0, 
rux_tu = 0}, td_map_def_user = 0x0}
(kgdb) p *(cpuid_to_pcpu[2]->pc_curthread->td_proc)
$2 = {p_list = {le_next = 0xffffff0653ff78c0, le_prev = 0xffffffff80841b48}, p_threads = {tqh_first = 0xffffff06b7f9c000, tqh_last = 0xffffff06b7f9c010}, p_slock = {lock_object = {lo_name = 0xffffffff806323c0 "process slock", lo_flags = 720896, lo_data = 0, lo_witness = 0x0}, mtx_lock = 4}, p_ucred = 0xffffff0551cf9900, p_fd = 0x0, p_fdtol = 0x0, p_stats = 0xffffff04ea565600, p_limit = 0x0, p_limco = {c_links = {sle = {sle_next = 0x0}, tqe = { tqe_next = 0x0, tqe_prev = 0x0}}, c_time = 0, c_arg = 0x0, c_func = 0, c_lock = 0xffffff070b5a49b8, c_flags = 0, c_cpu = 0}, p_sigacts = 0xffffff0a663a1000, p_flag = 268443904, p_state = PRS_NORMAL, p_pid = 78097, p_hash = {le_next = 0x0, le_prev = 0xffffff800021c888}, p_pglist = {le_next = 0xffffff00285c5460, le_prev = 0xffffff0afa9b8988}, p_pptr = 0xffffff0afa9b88c0, p_sibling = {le_next = 0xffffff00285c5460, le_prev = 0xffffff0afa9b89b0}, p_children = {lh_first = 0x0}, p_mtx = {lock_object = {lo_name = 0xffffffff806323b3 "process lock", lo_flags = 21168128, lo_data = 10, lo_witness = 0x0}, mtx_lock = 18446743003054325761}, p_ksi = 0xffffff0016738bd0, p_sigqueue = {sq_signals = {__bits = {16384, 0, 0, 0}}, sq_kill = {__bits = {0, 0, 0, 0}}, sq_list = {tqh_first = 0xffffff033829d070, tqh_last = 0xffffff033829d070}, sq_proc = 0xffffff070b5a48c0, sq_flags = 1}, p_oppid = 0, p_vmspace = 0xffffffff8083e920, p_swtick = 89392056, p_realtimer = {it_interval = {tv_sec = 0, tv_usec = 0}, it_value = {tv_sec = 0, tv_usec = 0}}, p_ru = {ru_utime = {tv_sec = 0, tv_usec = 0}, ru_stime = {tv_sec = 0, tv_usec = 0}, ru_maxrss = 0, ru_ixrss = 0, ru_idrss = 0, ru_isrss = 0, ru_minflt = 0, ru_majflt = 0, ru_nswap = 0, ru_inblock = 0, ru_oublock = 0, ru_msgsnd = 0, ru_msgrcv = 0, ru_nsignals = 0, ru_nvcsw = 0, ru_nivcsw = 0}, p_rux = {rux_runtime = 483265518, rux_uticks = 7, rux_sticks = 17, rux_iticks = 0, rux_uu = 61934, rux_su = 150412, rux_tu = 212347}, p_crux = {rux_runtime = 80058539464, rux_uticks = 2914, rux_sticks = 1778, rux_iticks = 0, rux_uu = 21847439, rux_su = 13330387, rux_tu = 35177827}, p_profthreads = 0, p_exitthreads = 0, p_traceflag = 0, p_tracevp = 0x0, p_tracecred = 0x0, p_textvp = 0x0, p_lock = 11, p_sigiolst = {slh_first = 0x0}, p_sigparent = 20, p_sig = 0, p_code = 0, p_stops = 0, p_stype = 0, p_step = 0 '\0', p_pfsflags = 0 '\0', p_nlminfo = 0x0, p_aioinfo = 0x0, p_singlethread = 0x0, p_suspcount = 0, p_xthread = 0xffffff06b7f9c000, p_boundary_count = 0, p_pendingcnt = 1, p_itimers = 0x0, p_magic = 3203398350, p_osrel = 802000, p_comm = "httpd", '\0' <repeats 14 times>, p_pgrp = 0xffffff05f3928080, p_sysent = 0xffffffff807fe180, p_args = 0xffffff0a8ad5e600, p_cpulimit = 9223372036854775807, p_nice = 0 '\0', p_fibnum = 0, p_xstat = 0, p_klist = {kl_list = {slh_first = 0x0}, kl_lock = 0xffffffff803586e0 <knlist_mtx_lock>, kl_unlock = 0xffffffff803586b0 <knlist_mtx_unlock>, kl_assert_locked = 0xffffffff80355380 <knlist_mtx_assert_locked>, kl_assert_unlocked = 0xffffffff80355390 <knlist_mtx_assert_unlocked>, kl_lockarg = 0xffffff070b5a49b8}, p_numthreads = 1, p_md = {md_ldt = 0x0, md_ldt_sd = {sd_lolimit = 0, sd_lobase = 0, sd_type = 0, sd_dpl = 0, sd_p = 0, sd_hilimit = 0, sd_xx0 = 0, sd_gran = 0, sd_hibase = 0, sd_xx1 = 0, sd_mbz = 0, sd_xx2 = 0}}, p_itcallout = {c_links = {sle = {sle_next = 0x0}, tqe = {tqe_next = 0x0, tqe_prev = 0x0}}, c_time = 0, c_arg = 0x0, c_func = 0, c_lock = 0x0, c_flags = 16, c_cpu = 0}, p_acflag = 1, p_peers = 0x0, p_leader = 0xffffff070b5a48c0, p_emuldata = 0x0, p_label = 0x0, p_sched = 0xffffff070b5a4d20, p_ktr = {stqh_first = 0x0, stqh_last = 0xffffff070b5a4cf0}, p_mqnotifier = {lh_first = 0x0}, p_dtrace = 0x0, p_pwait = {cv_description = 0xffffffff80632b87 "ppwait", cv_waiters = 0}}
(kgdb) p *(cpuid_to_pcpu[2]->pc_curthread->td_proc->p_limit)
Cannot access memory at address 0x0

2. do you have any additional patches in your source tree besides those 
debugging
patches that I provided to you?

Yes, in this build we have:-
1. tcp_reass.c-logdebug+missingsegment-20110811-lstewart.patch (fixes tcp 
stalling)
http://people.freebsd.org/~lstewart/patches/misctcp/tcp_reass.c-logdebug%2bmissingsegment-20110811-lstewart.diff
2. libz.patch (disables assembly optimisations in libz as it causes application 
crashes)
3. udp6_usrreq.c.patch (fixes ipv4 on ipv6 sockets)
http://svnweb.freebsd.org/base/head/sys/netinet6/udp6_usrreq.c?r1=220463&r2=220462&pathrev=220463
4. cam-timeout-fix.patch (fixes overflow in cam timeouts)
http://codelabs.ru/fbsd/patches/cam/CAM-properly-convert-timeout-to-ticks.diff
5. ixgbe.c.patch & ixgbe.h.patch (fixes ipconfig disconnecting link)
6. stop_scheduler_on_panic.8.x.patch (your first patch)
7. panic-info.patch (your second patch)

The only patches of these present when we initially noticed the problem
where #2, #3 & #5 (but these machines are not using this driver)

3. do you have any thirdparty/out-of-tree kernel modules?
Nope, our kernel is compiled with a load of drivers disabled and then the 
following:-

device ahci

makeoptions MODULES_OVERRIDE="linux linprocfs acpi nullfs unionfs accf_http if_lagg 
opensolaris zfs ipmi i2c"
options     COMPAT_LINUX32
options     DEVICE_POLLING

N.B. although device polling is compiled in its not used on any of these 
machines.

4. could you please send me your kernel config?
See direct email, as not sure it will go to the list.

   Regards
Steve

================================================
This e.mail is private and confidential between Multiplay (UK) Ltd. and the person or entity to whom it is addressed. In the event of misdirection, the recipient is prohibited from using, copying, printing or otherwise disseminating it or any information contained in it.
In the event of misdirection, illegible or incomplete transmission please 
telephone +44 845 868 1337
or return the E.mail to postmas...@multiplay.co.uk.

_______________________________________________
freebsd-stable@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-stable
To unsubscribe, send any mail to "freebsd-stable-unsubscr...@freebsd.org"

Reply via email to