I can confirm this. The cursor is blinking along nicely with a 200mS on/off time then with this patch installed:
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index 6e92917..7855446 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -402,6 +402,8 @@ static void cursor_timer_handler(unsigned long dev_addr) struct fbcon_ops *ops = info->fbcon_par; queue_work(system_power_efficient_wq, &info->queue); + if (WARN_ON(ops->cur_blink_jiffies < 10)) + ops->cur_blink_jiffies = 200; mod_timer(&ops->cursor_timer, jiffies + ops->cur_blink_jiffies); } @@ -417,6 +419,8 @@ static void fbcon_add_cursor_timer(struct fb_info *info) init_timer(&ops->cursor_timer); ops->cursor_timer.function = cursor_timer_handler; + if (WARN_ON(ops->cur_blink_jiffies < 10)) + ops->cur_blink_jiffies = 200; ops->cursor_timer.expires = jiffies + ops->cur_blink_jiffies; ops->cursor_timer.data = (unsigned long ) info; add_timer(&ops->cursor_timer); @@ -1096,6 +1100,7 @@ static void fbcon_init(struct vc_data *vc, int init) ops = info->fbcon_par; ops->cur_blink_jiffies = msecs_to_jiffies(vc->vc_cur_blink_ms); + WARN_ON(ops->cur_blink_jiffies < 10); p->con_rotate = initial_rotation; set_blitting_type(vc, info); @@ -1310,6 +1315,7 @@ static void fbcon_cursor(struct vc_data *vc, int mode) int c = scr_readw((u16 *) vc->vc_pos); ops->cur_blink_jiffies = msecs_to_jiffies(vc->vc_cur_blink_ms); + WARN_ON(ops->cur_blink_jiffies < 10); if (fbcon_is_inactive(vc, info) || vc->vc_deccm != 1) return; ------------------------------------------------------------- I get: [ 29.386066] ------------[ cut here ]------------ [ 29.386080] WARNING: CPU: 0 PID: 1688 at drivers/video/console/fbcon.c:1103 fbcon_init+0x47c/0x4b8 [ 29.386145] Modules linked in: vfat(E) fat(E) aes_ce_blk(E) ablk_helper(E) cryptd(E) aes_ce_cipher(E) ghash_ce(E) sha2_ce(E) sha1_ce(E) sg(E) ip_tables(E) xfs(E) libcrc32c(E) nicvf(E) ast(E) i2c_algo_bit(E) drm_kms_helper(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) fb_sys_fops(E) ttm(E) drm(E) i2c_core(E) nicpf(E) thunder_bgx(E) mdio_thunder(E) mdio_cavium(E) dm_mirror(E) dm_region_hash(E) dm_log(E) dm_mod(E) [ 29.386147] [ 29.386153] CPU: 0 PID: 1688 Comm: systemd-logind Tainted: G E 4.6.0-rc3-arm64next+ #278 [ 29.386155] Hardware name: www.cavium.com ThunderX CRB-2S/ThunderX CRB-2S, BIOS 0.3 Apr 25 2016 [ 29.386159] task: fffffe0fd1109d80 ti: fffffe0fd119c000 task.ti: fffffe0fd119c000 [ 29.386163] PC is at fbcon_init+0x47c/0x4b8 [ 29.386167] LR is at fbcon_init+0x144/0x4b8 [ 29.386170] pc : [<fffffc0008468930>] lr : [<fffffc00084685f8>] pstate: 80000145 [ 29.386172] sp : fffffe0fd119f900 [ 29.386179] x29: fffffe0fd119f900 x28: fffffe0fdc059c00 [ 29.386186] x27: fffffe001c7c8800 x26: fffffe0fd6027910 [ 29.386192] x25: fffffe0fd6027800 x24: fffffc0009b56f98 [ 29.386198] x23: 0000000000000001 x22: fffffc0008d1a000 [ 29.386205] x21: fffffe0ff4062000 x20: fffffc0009b57018 [ 29.386211] x19: fffffc0009b56000 x18: 000002aae7bd51f0 [ 29.386222] x17: 000003ffb542f9f0 x16: fffffc0008242b80 [ 29.386228] x15: 000002aae7bdb328 x14: 0000000000000006 [ 29.386234] x13: 000002aae7bc72c8 x12: 0000000000000000 [ 29.386240] x11: 0000000000000000 x10: 0000000000000000 [ 29.386247] x9 : 0000000000000000 x8 : fffffe001c7c8b18 [ 29.386253] x7 : 0000000000000000 x6 : 000000000000007f [ 29.386259] x5 : fffffc0008c91640 x4 : 0000000000000000 [ 29.386266] x3 : 0000000000000800 x2 : fffffe001c7c8ce8 [ 29.386272] x1 : fffffe0fd4df4e00 x0 : 0000000000000000 [ 29.386274] [ 29.386277] ---[ end trace 6c32ddc01008c9ba ]--- [ 29.386280] Call trace: [ 29.386284] Exception stack(0xfffffe0fd119f730 to 0xfffffe0fd119f850) [ 29.386288] f720: fffffc0009b56000 fffffc0009b57018 [ 29.386292] f740: fffffe0fd119f900 fffffc0008468930 0000000080000145 000000000000003d [ 29.386296] f760: fffffc0008c36ce8 fffffc00081c2a30 fffffe0fd119f7f0 fffffc00080f47d8 [ 29.386299] f780: fffffe0fd119f800 fffffc00080f47d8 000000000000010c fffffe0fd1109d80 [ 29.386303] f7a0: 0000000000000000 000000000025810c fffffe0fd110a5a0 0000000000000000 [ 29.386307] f7c0: fffffc000880d2e8 0000000000000000 fffffe0fd119f850 fffffc00080f47d8 [ 29.386311] f7e0: 0000000000000000 fffffe0fd4df4e00 fffffe001c7c8ce8 0000000000000800 [ 29.386314] f800: 0000000000000000 fffffc0008c91640 000000000000007f 0000000000000000 [ 29.386318] f820: fffffe001c7c8b18 0000000000000000 0000000000000000 0000000000000000 [ 29.386321] f840: 0000000000000000 000002aae7bc72c8 [ 29.386327] [<fffffc0008468930>] fbcon_init+0x47c/0x4b8 [ 29.386332] [<fffffc00084d6e3c>] visual_init+0xbc/0x114 [ 29.386336] [<fffffc00084d9938>] vc_allocate+0x108/0x1e0 [ 29.386340] [<fffffc00084d9a44>] con_install+0x34/0x100 [ 29.386347] [<fffffc00084c2f44>] tty_init_dev+0x74/0x1a8 [ 29.386351] [<fffffc00084c3560>] tty_open+0x4e8/0x5a8 [ 29.386357] [<fffffc0008248a78>] chrdev_open+0xa8/0x1ac [ 29.386361] [<fffffc0008241288>] do_dentry_open+0x1e8/0x300 [ 29.386364] [<fffffc0008242694>] vfs_open+0x6c/0x7c [ 29.386370] [<fffffc0008252864>] do_last+0x12c/0xc24 [ 29.386374] [<fffffc00082533d8>] path_openat+0x7c/0x2b8 [ 29.386377] [<fffffc00082546d4>] do_filp_open+0x78/0xd4 [ 29.386381] [<fffffc0008242a74>] do_sys_open+0x150/0x214 [ 29.386384] [<fffffc0008242bbc>] SyS_openat+0x3c/0x48 [ 29.386389] [<fffffc000808538c>] __sys_trace_return+0x0/0x4 . . . [ OK ] Started Command Scheduler. Starting Command Scheduler... [ 33.395141] ------------[ cut here ]------------ [ 33.399773] WARNING: CPU: 0 PID: 0 at drivers/video/console/fbcon.c:405 cursor_timer_handler+0x5c/0x6c [ 33.409072] Modules linked in: ip6t_REJECT(E) nf_reject_ipv6(E) ipt_REJECT(E) nf_reject_ipv4(E) xt_conntrack(E) ebtable_nat(E) ebtable_broute(E) bridge(E) stp(E) llc(E) ebtable_filter(E) ebtables(E) ip6table_nat(E) nf_conntrack_ipv6(E) nf_defrag_ipv6(E) nf_nat_ipv6(E) ip6table_mangle(E) ip6table_security(E) ip6table_raw(E) ip6table_filter(E) ip6_tables(E) iptable_nat(E) nf_conntrack_ipv4(E) nf_defrag_ipv4(E) nf_nat_ipv4(E) nf_nat(E) nf_conntrack(E) iptable_mangle(E) iptable_security(E) iptable_raw(E) iptable_filter(E) vfat(E) fat(E) aes_ce_blk(E) ablk_helper(E) cryptd(E) aes_ce_cipher(E) ghash_ce(E) sha2_ce(E) sha1_ce(E) sg(E) ip_tables(E) xfs(E) libcrc32c(E) nicvf(E) ast(E) i2c_algo_bit(E) drm_kms_helper(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) fb_sys_fops(E) ttm(E) drm(E) i2c_core(E) nicpf(E) thunder_bgx(E) mdio_thunder(E) mdio_cavium(E) dm_mirror(E) dm_region_hash(E) dm_log(E) dm_mod(E) [ 33.488864] [ 33.490358] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W E 4.6.0-rc3-arm64next+ #278 [ 33.498877] Hardware name: www.cavium.com ThunderX CRB-2S/ThunderX CRB-2S, BIOS 0.3 Apr 25 2016 [ 33.507571] task: fffffc0008c79c80 ti: fffffc0008c50000 task.ti: fffffc0008c50000 [ 33.515053] PC is at cursor_timer_handler+0x5c/0x6c [ 33.519930] LR is at cursor_timer_handler+0x30/0x6c [ 33.524804] pc : [<fffffc0008467490>] lr : [<fffffc0008467464>] pstate: 80000145 [ 33.532198] sp : ffffff0ff9dc3da0 [ 33.535513] x29: ffffff0ff9dc3da0 x28: fffffc0008c70be8 [ 33.540836] x27: fffffc0008c70000 x26: fffffc0008d68fb3 [ 33.546158] x25: fffffc0008c70000 x24: fffffc0008d6a000 [ 33.551483] x23: fffffe0fd6027800 x22: fffffc0008467434 [ 33.556809] x21: 0000000000000101 x20: fffffe0fd6027800 [ 33.562138] x19: fffffe0fdc059c00 x18: 000002aadcf70050 [ 33.567464] x17: 000003ff95cfa228 x16: fffffc000829628c [ 33.572786] x15: 00000000000000dd x14: ffffffc2c3bfa000 [ 33.578114] x13: 003d093352dc0000 x12: 00000000003d0900 [ 33.583440] x11: 0000000000000000 x10: 00003d09334ec400 [ 33.588765] x9 : fffffc0008c50000 x8 : fffffc0008851320 [ 33.594090] x7 : 00000000ad583ee9 x6 : 0000000000000001 [ 33.599412] x5 : fffffc0008c79c80 x4 : fffffc0009012c28 [ 33.604743] x3 : 0000000000000004 x2 : 0000000000000002 [ 33.610074] x1 : 0000000000000000 x0 : 0000000000000001 [ 33.615395] [ 33.616886] ---[ end trace 6c32ddc01008c9c3 ]--- [ 33.621506] Call trace: [ 33.623951] Exception stack(0xffffff0ff9dc3bd0 to 0xffffff0ff9dc3cf0) [ 33.630390] 3bc0: fffffe0fdc059c00 fffffe0fd6027800 [ 33.638221] 3be0: ffffff0ff9dc3da0 fffffc0008467490 0000000080000145 000000000000003d [ 33.646050] 3c00: fffffe0fd46b5700 ffffff0ff9dd8f00 fffffc0008d69de0 0000000000000000 [ 33.653882] 3c20: fffffc0008c36ce8 fffffc0008c24b28 fffffc0008c70be8 fffffc0008da5b48 [ 33.661708] 3c40: ffffff0ff9dc3cb0 fffffc0008814614 ffffff0ff9dd8f00 fffffc0008d68ea4 [ 33.669535] 3c60: fffffc0008d69de0 0000000000000000 fffffc0008c36ce8 fffffc0008c24b28 [ 33.677361] 3c80: 0000000000000001 0000000000000000 0000000000000002 0000000000000004 [ 33.685191] 3ca0: fffffc0009012c28 fffffc0008c79c80 0000000000000001 00000000ad583ee9 [ 33.693017] 3cc0: fffffc0008851320 fffffc0008c50000 00003d09334ec400 0000000000000000 [ 33.700845] 3ce0: 00000000003d0900 003d093352dc0000 [ 33.705726] [<fffffc0008467490>] cursor_timer_handler+0x5c/0x6c [ 33.711648] [<fffffc0008117bd8>] call_timer_fn+0xc0/0x3e4 [ 33.717047] [<fffffc0008118114>] run_timer_softirq+0x1ec/0x39c [ 33.722879] [<fffffc0008081794>] __do_softirq+0x104/0x598 [ 33.728278] [<fffffc00080a2ca4>] irq_exit+0x9c/0xc0 [ 33.733155] [<fffffc0008102384>] __handle_domain_irq+0x98/0xfc [ 33.739009] [<fffffc0008081594>] gic_handle_irq+0x94/0x190 [ 33.744492] Exception stack(0xffffff0ff9dc3fc0 to 0xffffff0ff9dc40e0) [ 33.750930] 3fc0: fffffc0008c53da0 fffffc0008d6a000 fffffc0008c53ec0 fffffc00080860c0 [ 33.758756] 3fe0: 0000000060000145 fffffc0008a33c98 ffffff0ff9dc0020 0000000000000000 [ 33.766582] 4000: fffffc0008c53ec0 fffffc0008c53da0 0000000000000000 0000000000000000 [ 33.774408] 4020: fffffe0fee5412d0 0000000000000000 0000000000000000 0000000000000000 [ 33.782234] 4040: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 33.790060] 4060: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 33.797887] 4080: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 33.805715] 40a0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 33.813547] 40c0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 33.821374] [<fffffc0008084de4>] el1_irq+0xa4/0x114 [ 33.826251] [<fffffc00080860c0>] arch_cpu_idle+0x40/0x1fc [ 33.831651] [<fffffc00080e9970>] default_idle_call+0x20/0x34 [ 33.837310] [<fffffc00080e9ae4>] cpu_startup_entry+0x160/0x314 [ 33.843142] [<fffffc000880d968>] rest_init+0x150/0x160 [ 33.848283] [<fffffc0008b70b90>] start_kernel+0x3b8/0x3cc [ 33.853681] [<fffffc0008b701c4>] __primary_switched+0x30/0x6c On 05/17/2016 07:29 AM, Peter Hurley wrote: > [ +to Scot Doyle ] > > Scot, please take a look at this soft lockup. > > Regards, > Peter Hurley > > > Hi Ming, > > On 05/17/2016 02:12 AM, Ming Lei wrote: >> Hi, >> >> On Tue, May 17, 2016 at 4:07 AM, Dann Frazier >> <dann.frazier at canonical.com> wrote: >>> Hi, >>> I'm observing a soft lockup issue w/ the ASPEED controller on an >>> arm64 server platform. This was originally seen on Ubuntu's 4.4 >>> kernel, but it is reproducible w/ vanilla 4.6-rc7 as well. >>> >>> [ 32.792656] NMI watchdog: BUG: soft lockup - CPU#38 stuck for 22s! >>> [swapper/38:0] >>> >>> I observe this just once each time I boot into debian-installer (I'm >>> using a serial console, but the ast module gets loaded during >>> startup). >> >> I have figured out that it is caused by 'mod_timer(timer, jiffies)' and >> 'ops->cur_blink_jiffies' is observed as zero in cursor_timer_handler() >> when the issue happened. > > Thanks for tracking this down. > > This softlockup looks to be caused by: > > commit 27a4c827c34ac4256a190cc9d24607f953c1c459 > Author: Scot Doyle <lkml14 at scotdoyle.com> > Date: Thu Mar 26 13:56:38 2015 +0000 > > fbcon: use the cursor blink interval provided by vt > > vt now provides a cursor blink interval via vc_data. Use this > interval instead of the currently hardcoded 200 msecs. Store it in > fbcon_ops to avoid locking the console in cursor_timer_handler(). > > Signed-off-by: Scot Doyle <lkml14 at scotdoyle.com> > Acked-by: Pavel Machek <pavel at ucw.cz> > Signed-off-by: Greg Kroah-Hartman <gregkh at linuxfoundation.org> > > and > > commit bd63364caa8df38bad2b25b11b2a1b849475cce5 > Author: Scot Doyle <lkml14 at scotdoyle.com> > Date: Thu Mar 26 13:54:39 2015 +0000 > > vt: add cursor blink interval escape sequence > > Add an escape sequence to specify the current console's cursor blink > interval. The interval is specified as a number of milliseconds > until > the next cursor display state toggle, from 50 to 65535. > /proc/loadavg > did not show a difference with a one msec interval, but the lower > bound is set to 50 msecs since slower hardware wasn't tested. > > Store the interval in the vc_data structure for later access by > fbcon, > initializing the value to fbcon's current hardcoded value of 200 > msecs. > > Signed-off-by: Scot Doyle <lkml14 at scotdoyle.com> > Acked-by: Pavel Machek <pavel at ucw.cz> > Signed-off-by: Greg Kroah-Hartman <gregkh at linuxfoundation.org> > > > >> Looks it is a real fbcon/vt issue, see following: >> >> fbcon_init() >> <-.con_init >> <-visual_init() >> >> reset_terminal() >> <-vc_init() >> >> vc->vc_cur_blink_ms is just set in reset_terminal() from vc_init() path, >> and ops->cur_blink_jiffies is figured out from vc->vc_cur_blink_ms >> in fbcon_init(). >> >> And visual_init() is always run before vc_init(), so ops->cur_blink_jiffies >> is initialized as zero and cause the soft lockup issue finally. >> >> Thanks, >> Ming >> >>> >>> perf shows that the CPU caught by the NMI is typically in code >>> updating the cursor timer: >>> >>> - 16.92% swapper [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore >>> - _raw_spin_unlock_irqrestore >>> + 16.87% mod_timer >>> + 0.05% cursor_timer_handler >>> - 12.15% swapper [kernel.kallsyms] [k] queue_work_on >>> - queue_work_on >>> + 12.00% cursor_timer_handler >>> + 0.15% call_timer_fn >>> + 10.98% swapper [kernel.kallsyms] [k] run_timer_softirq >>> - 2.23% swapper [kernel.kallsyms] [k] mod_timer >>> - mod_timer >>> + 1.97% cursor_timer_handler >>> + 0.26% call_timer_fn >>> >>> During the same period, I can see that another CPU is actively >>> executing the timer function: >>> >>> - 42.18% kworker/u96:2 [kernel.kallsyms] [k] ww_mutex_unlock >>> - ww_mutex_unlock >>> - 40.70% ast_dirty_update >>> ast_imageblit >>> soft_cursor >>> bit_cursor >>> fb_flashcursor >>> process_one_work >>> worker_thread >>> kthread >>> ret_from_fork >>> + 1.48% ast_imageblit >>> - 40.15% kworker/u96:2 [kernel.kallsyms] [k] __memcpy_toio >>> - __memcpy_toio >>> + 31.54% ast_dirty_update >>> + 8.61% ast_imageblit >>> >>> Using the graph function tracer on fb_flashcursor(), I see that >>> ast_dirty_update usually takes around 60 us, in which it makes 16 >>> calls to __memcpy_toio(). However, there is always one instance on >>> every boot of the installer where ast_dirty_update() takes ~98 *ms* to >>> complete, during which it makes 743 calls to __memcpy_toio(). While >>> that doesn't directly account for the full 22s, I do wonder if that >>> maybe a smoking gun. >>> >>> fyi, this is being tracked at: https://bugs.launchpad.net/bugs/1574814 >>> >>> -dann >