Here's some debugging output. I installed linux-image-2.6.32-5-amd64-dbg and linux-source-2.6.32 and then used
sudo gdbserver-xen 127.0.0.1:9999 --attach 37 where 37 is the domid printed by xm list and sudo gdb /local/lindi/vmlinux-2.6.32-5-amd64 (gdb) target remote 127.0.0.1:9999 on the opensuse dom0 to attach gdb to linux. It seems that Linux (-17) is stuck trying to print an OOPS: (gdb) info threads * 1 Remote target 0xffffffff812f8c78 in __ticket_spin_lock ( lock=<value optimized out>) at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/include/asm/spinlock.h:114 (gdb) bt full #0 0xffffffff812f8c78 in __ticket_spin_lock (lock=<value optimized out>) at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/include/asm/spinlock.h:114 inc = 1 tmp = 0 #1 __raw_spin_lock (lock=<value optimized out>) at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/include/asm/spinlock.h:190 No locals. #2 __spin_lock (lock=<value optimized out>) at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/include/linux/spinlock_api_smp.h:337 No locals. #3 _spin_lock (lock=<value optimized out>) at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/kernel/spinlock.c:138 No locals. #4 0xffffffff8104d344 in vprintk ( fmt=0xffffffff813a8798 "<1>BUG: unable to handle kernel ", args=0xffff88003fdb93a8) at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/kernel/printk.c:705 printed_len = 0 current_log_level = 4 flags = 0 p = <value optimized out> #5 0xffffffff812f6eec in printk (fmt=0xffffffff815d13c0 "") at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/kernel/printk.c:595 ---Type <return> to continue, or q <return> to quit--- args = {{gp_offset = 8, fp_offset = 48, overflow_arg_area = 0xffff88003fdb9488, reg_save_area = 0xffff88003fdb93c8}} r = 1 #6 0xffffffff810311b0 in show_fault_oops (address=<value optimized out>, error_code=<value optimized out>, regs=<value optimized out>) at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/mm/fault.c:583 No locals. #7 no_context (address=<value optimized out>, error_code=<value optimized out>, regs=<value optimized out>) at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/mm/fault.c:658 tsk = 0xffff88003fdb4000 flags = 0 sig = <value optimized out> #8 0xffffffff81031429 in __bad_area_nosemaphore (regs=0xffff88003fdb9598, error_code=<value optimized out>, address=18446744071582043416, si_code=196609) at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/mm/fault.c:741 tsk = 0xffff88003fdb4000 #9 0xffffffff812f9015 in ?? () at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/kernel/entry_64.S:1383 No locals. #10 0x0005398d646e415d in ?? () No symbol table info available. #11 0x00000000d5de35ab in ?? () ---Type <return> to continue, or q <return> to quit--- No symbol table info available. #12 0x0000000000000002 in ?? () No symbol table info available. #13 0x000c82198774ef00 in ?? () No symbol table info available. #14 0x0000000000000001 in ?? () No symbol table info available. #15 0xffff8800032e6040 in ?? () No symbol table info available. #16 0x0000000000000202 in ?? () No symbol table info available. #17 0xffff880000000000 in ?? () No symbol table info available. #18 0xffffffff813a87b8 in ?? () __kcrctab_unregister_net_sysctl_table = 2623496682 __kstrtab_unregister_net_sysctl_table = "unregister_net_sysctl_table" net_sysctl_ro_root = {root_list = {next = 0xffffffff814649b0, prev = 0xffffffff814a6350}, default_set = {list = { next = 0xffff88003fe34c08, prev = 0xffff88003d8dd808}, parent = 0xffffffff814649c0, is_seen = 0}, lookup = 0, permissions = 0xffffffff812e4313 <net_ctl_ro_header_perms>} sysctl_pernet_ops = {list = {next = 0xffffffff8149bcc0, prev = 0xffffffff814a6140}, init = 0xffffffff812e4323 <sysctl_net_init>, exit = 0xffffffff812e4375 <sysctl_net_exit>} __ksymtab_register_net_sysctl_table = {value = 18446744071581877167, name = 0xffffffff81422f46 "register_net_sysctl_table"} __kstrtab_register_net_sysctl_table = "register_net_sysctl_table" __initcall_sysctl_init4 = 0 ---Type <return> to continue, or q <return> to quit--- __kcrctab_register_net_sysctl_table = 829491708 __ksymtab_register_net_sysctl_rotable = {value = 18446744071581877142, name = 0xffffffff81422f2a "register_net_sysctl_rotable"} __ksymtab_unregister_net_sysctl_table = {value = 18446744071581877137, name = 0xffffffff81422f0e "unregister_net_sysctl_table"} net_sysctl_root = {root_list = {next = 0xffffffff814a6390, prev = 0xffffffff814649b0}, default_set = {list = {next = 0x0, prev = 0x0}, parent = 0x0, is_seen = 0}, lookup = 0xffffffff812e42e8 <net_ctl_header_lookup>, permissions = 0xffffffff812e4342 <net_ctl_permissions>} __kcrctab_register_net_sysctl_rotable = 2528268687 __kstrtab_register_net_sysctl_rotable = "register_net_sysctl_rotable" #19 0x0000000000000020 in ?? () No symbol table info available. #20 0x0005398d90ee1210 in ?? () No symbol table info available. #21 0x00000000f2572b01 in ?? () No symbol table info available. #22 0x0005398d9113c2e5 in ?? () No symbol table info available. #23 0xffffffff813a87b8 in ?? () __kcrctab_unregister_net_sysctl_table = 2623496682 __kstrtab_unregister_net_sysctl_table = "unregister_net_sysctl_table" net_sysctl_ro_root = {root_list = {next = 0xffffffff814649b0, prev = 0xffffffff814a6350}, default_set = {list = { next = 0xffff88003fe34c08, prev = 0xffff88003d8dd808}, parent = 0xffffffff814649c0, is_seen = 0}, lookup = 0, permissions = 0xffffffff812e4313 <net_ctl_ro_header_perms>} sysctl_pernet_ops = {list = {next = 0xffffffff8149bcc0, ---Type <return> to continue, or q <return> to quit--- prev = 0xffffffff814a6140}, init = 0xffffffff812e4323 <sysctl_net_init>, exit = 0xffffffff812e4375 <sysctl_net_exit>} __ksymtab_register_net_sysctl_table = {value = 18446744071581877167, name = 0xffffffff81422f46 "register_net_sysctl_table"} __kstrtab_register_net_sysctl_table = "register_net_sysctl_table" __initcall_sysctl_init4 = 0 __kcrctab_register_net_sysctl_table = 829491708 __ksymtab_register_net_sysctl_rotable = {value = 18446744071581877142, name = 0xffffffff81422f2a "register_net_sysctl_rotable"} __ksymtab_unregister_net_sysctl_table = {value = 18446744071581877137, name = 0xffffffff81422f0e "unregister_net_sysctl_table"} net_sysctl_root = {root_list = {next = 0xffffffff814a6390, prev = 0xffffffff814649b0}, default_set = {list = {next = 0x0, prev = 0x0}, parent = 0x0, is_seen = 0}, lookup = 0xffffffff812e42e8 <net_ctl_header_lookup>, permissions = 0xffffffff812e4342 <net_ctl_permissions>} __kcrctab_register_net_sysctl_rotable = 2528268687 __kstrtab_register_net_sysctl_rotable = "register_net_sysctl_rotable" #24 0xffff8800032e6040 in ?? () No symbol table info available. #25 0xffffffffffffffff in ?? () No symbol table info available. #26 0xffffffff8102bddf in pvclock_clocksource_read (src=0x1) at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/kernel/pvclock.c:145 ret = 0 offset = <value optimized out> last = 1 ---Type <return> to continue, or q <return> to quit--- #27 0x0000000000000000 in ?? () No symbol table info available. We probably should figure out what caused the OOPS so the following information about printk being stuck is probably not very relevant: (gdb) x/10i $rip 0xffffffff812f8c78 <__ticket_spin_lock+21>: movzwl (%rdi),%edx 0xffffffff812f8c7b <__ticket_spin_lock+24>: jmp 0xffffffff812f8c72 <__ticket_spin_lock+15> 0xffffffff812f8c7d <_spin_lock+26>: retq 0xffffffff812f8c7e <__raw_write_lock>: subl $0x1000000,%ds:(%rdi) 0xffffffff812f8c85 <__raw_write_lock+7>: je 0xffffffff812f8c8c <_write_lock+14> 0xffffffff812f8c87 <__raw_write_lock+9>: callq 0xffffffff81193b80 0xffffffff812f8c8c <_write_lock+14>: retq 0xffffffff812f8c8d <__ticket_spin_unlock>: incw (%rdi) 0xffffffff812f8c90 <raw_local_irq_restore>: mov %rsi,%rdi 0xffffffff812f8c93 <raw_local_irq_restore+3>: callq *0xffffffff814629b8 (gdb) si 0xffffffff812f8c7b 114 asm volatile(LOCK_PREFIX "xaddl %0, %1\n" (gdb) si 0xffffffff812f8c72 114 asm volatile(LOCK_PREFIX "xaddl %0, %1\n" (gdb) x/10i $rip 0xffffffff812f8c72 <__ticket_spin_lock+15>: cmp %eax,%edx 0xffffffff812f8c74 <__ticket_spin_lock+17>: je 0xffffffff812f8c7d <_spin_lock+26> 0xffffffff812f8c76 <__ticket_spin_lock+19>: pause 0xffffffff812f8c78 <__ticket_spin_lock+21>: movzwl (%rdi),%edx 0xffffffff812f8c7b <__ticket_spin_lock+24>: jmp 0xffffffff812f8c72 <__ticket_spin_lock+15> 0xffffffff812f8c7d <_spin_lock+26>: retq 0xffffffff812f8c7e <__raw_write_lock>: subl $0x1000000,%ds:(%rdi) 0xffffffff812f8c85 <__raw_write_lock+7>: je 0xffffffff812f8c8c <_write_lock+14> 0xffffffff812f8c87 <__raw_write_lock+9>: callq 0xffffffff81193b80 0xffffffff812f8c8c <_write_lock+14>: retq (gdb) info register rax 0x1 1 rbx 0x0 0 rcx 0xfffedb34 4294892340 rdx 0x0 0 rsi 0xffff88003fdb93a8 -131940323978328 rdi 0xffffffff815d13c0 -2124606528 rbp 0xffff88003fdb93a8 0xffff88003fdb93a8 rsp 0xffff88003fdb92f0 0xffff88003fdb92f0 r8 0x2 2 r9 0xffff88003fdb4000 -131940323999744 r10 0xffff880000000000 -131941395333120 r11 0x202 514 r12 0xffffffff813a8798 -2126870632 r13 0x4 4 r14 0x3 3 r15 0x0 0 rip 0xffffffff812f8c72 0xffffffff812f8c72 <__ticket_spin_lock+15> eflags 0x1397 [ CF PF AF SF TF IF #12 ] cs 0xe033 57395 ss 0xe02b 57387 ds 0x0 0 es 0x0 0 fs 0x0 0 gs 0x0 0 fctrl 0x37f 895 fstat 0x0 0 ftag 0xffff 65535 fiseg 0x0 0 ---Type <return> to continue, or q <return> to quit--- fioff 0x0 0 foseg 0x0 0 fooff 0x0 0 fop 0x0 0 mxcsr 0x1f80 [ IM DM ZM OM UM PM ] (gdb) However, it seems that the OOPS is caused by a write (error_code==3) to variable called "last_value" that is in read-only memory (.rodata): (gdb) frame 7 #7 no_context (address=<value optimized out>, error_code=<value optimized out>, regs=<value optimized out>) at /build/buildd-linux-2.6_2.6.32-17-amd64-FO3n8v/linux-2.6-2.6.32/debian/build/source_amd64_none/arch/x86/mm/fault.c:658 658 show_fault_oops(regs, error_code, address); (gdb) p/x *regs $10 = {r15 = 0x5398d646e415d, r14 = 0xd5de35ab, r13 = 0x2, r12 = 0xc82198774ef00, bp = 0x1, bx = 0xffff8800032e6040, r11 = 0x202, r10 = 0xffff880000000000, r9 = 0xffffffff813a87b8, r8 = 0x20, ax = 0x5398d90ee1210, cx = 0xf2572b01, dx = 0x5398d9113c2e5, si = 0xffffffff813a87b8, di = 0xffff8800032e6040, orig_ax = 0xffffffffffffffff, ip = 0xffffffff8102bddf, cs = 0x10000e030, flags = 0x10002, sp = 0xffff88003fdb9640, ss = 0xe02b} (gdb) p error_code $11 = 3 (gdb) p/x address $22 = 0xffffffff8130cd18 (gdb) x/6i regs->ip 0xffffffff8102bddf <__cmpxchg>: cmpxchg %rdx,%ds:0x2e0f30(%rip) # 0xffffffff8130cd18 0xffffffff8102bde8 <pvclock_clocksource_read+122>: cmp %rdx,%rax 0xffffffff8102bdeb <pvclock_clocksource_read+125>: jne 0xffffffff8102bdda <pvclock_clocksource_read+108> 0xffffffff8102bded <pvclock_clocksource_read+127>: pop %rcx 0xffffffff8102bdee <pvclock_clocksource_read+128>: pop %rbx 0xffffffff8102bdef <pvclock_clocksource_read+129>: pop %rbp (gdb) shell objdump -axdt /local/lindi/vmlinux-2.6.32-5-amd64 | grep ffffffff8130cd18 ffffffff8130cd18 l O .rodata 0000000000000008 last_value ffffffff8102bdd3: 48 8b 05 3e 0f 2e 00 mov 0x2e0f3e(%rip),%rax # ffffffff8130cd18 <last_value> ffffffff8102bddf: f0 48 0f b1 15 30 0f lock cmpxchg %rdx,0x2e0f30(%rip) # ffffffff8130cd18 <last_value> What is interesting is that -15 does not contain such a variable: $ objdump -axdt 15/usr/lib/debug/boot/vmlinux-2.6.32-5-amd64|grep last_value ffffffff81671780 l O .bss 0000000000000001 last_value.26980 ffffffff811f5947: 0f b6 05 32 be 47 00 movzbl 0x47be32(%rip),%eax # ffffffff81671780 <last_value.26980> ffffffff811f5955: 88 15 25 be 47 00 mov %dl,0x47be25(%rip) # ffffffff81671780 <last_value.26980> $ objdump -axdt 17/usr/lib/debug/boot/vmlinux-2.6.32-5-amd64|grep last_value ffffffff8130cd18 l O .rodata 0000000000000008 last_value ffffffff81675bc0 l O .bss 0000000000000001 last_value.26980 ffffffff8102bdd3: 48 8b 05 3e 0f 2e 00 mov 0x2e0f3e(%rip),%rax # ffffffff8130cd18 <last_value> ffffffff8102bddf: f0 48 0f b1 15 30 0f lock cmpxchg %rdx,0x2e0f30(%rip) # ffffffff8130cd18 <last_value> ffffffff811f49f7: 0f b6 05 c2 11 48 00 movzbl 0x4811c2(%rip),%eax # ffffffff81675bc0 <last_value.26980> ffffffff811f4a05: 88 15 b5 11 48 00 mov %dl,0x4811b5(%rip) # ffffffff81675bc0 <last_value.26980> interdiff -z linux-2.6_2.6.32-15.diff.gz linux-2.6_2.6.32-17.diff.gz shows ++static atomic64_t last_value = ATOMIC64_INIT(0); ++ + cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) + { + struct pvclock_shadow_time shadow; + unsigned version; + cycle_t ret, offset; ++ u64 last; + + do { + version = pvclock_get_time_values(&shadow, src); +@@ -123,6 +126,27 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) + barrier(); + } while (version != src->version); + ++ /* ++ * Assumption here is that last_value, a global accumulator, always goes ++ * forward. If we are less than that, we should not be much smaller. ++ * We assume there is an error marging we're inside, and then the correction ++ * does not sacrifice accuracy. ++ * ++ * For reads: global may have changed between test and return, ++ * but this means someone else updated poked the clock at a later time. ++ * We just need to make sure we are not seeing a backwards event. ++ * ++ * For updates: last_value = ret is not enough, since two vcpus could be ++ * updating at the same time, and one of them could be slightly behind, ++ * making the assumption that last_value always go forward fail to hold. ++ */ ++ last = atomic64_read(&last_value); ++ do { ++ if (ret < last) ++ return last; ++ last = atomic64_cmpxchg(&last_value, last, ret); ++ } while (unlikely(last != ret)); ++ + return ret; + } + -Timo -- To UNSUBSCRIBE, email to debian-kernel-requ...@lists.debian.org with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org Archive: http://lists.debian.org/84pqynh98f....@sauna.l.org