Hi, Roland, Andrew.

Something really sick is going on with combination of utrace, RCU and ia64.

Below are quickly reproducable oops happenning on 8-way SMP,
my lame-o attempts to decode it, and test program itself.

I checked 2.6.21-rc5 which was OK,
2.6.21-rc5 + linux-2.6.21-current-utrace.patch which was bad, and
2.6.21-rc5-mm1 which was also bad.

------------------------------------------------------------
Unable to handle kernel paging request at virtual address 6b6b6b6b6b6b6b6b
expl_ptratt[4391]: Oops 11012296146944 [1]
Modules linked in: autofs4 hidp rfcomm l2cap bluetooth sunrpc 8021q bridge ipv6 
xt_length ipt_ttl xt_tcpmss iptable_mangle iptable_filter xt_multiport xt_limit 
ipt_tos ipt_REJECT ip_tables x_tables vfat fat button parport_pc lp parport sg 
e100 e1000 mii shpchp ide_cd cdrom dm_snapshot dm_zero dm_mirror dm_mod mptsas 
mptscsih mptbase scsi_transport_sas sd_mod scsi_mod ext3 jbd ehci_hcd ohci_hcd 
uhci_hcd
Pid: 4391, CPU 4, comm:          expl_ptratt
psr : 0000121008026018 ifs : 8000000000000389 ip  : [<a000000100094881>]    Not 
tainted
ip is at __rcu_process_callbacks+0x3e1/0x660
unat: 0000000000000000 pfs : 0000000000000389 rsc : 0000000000000003
rnat: 0000000000000000 bsps: 0000000000000000 pr  : 0000000000555965
ldrs: 0000000000000000 ccv : 0000000000000000 fpsr: 0009804c0270033f
csd : 0000000000000000 ssd : 0000000000000000
b0  : a0000001000948a0 b6  : a0000001000d9ee0 b7  : a000000100010080
f6  : 1003e6b6b6b6b6b6b6b6b f7  : 1003e0000000003333334
f8  : 1003e0000000000001680 f9  : 1003e0000000000001680
f10 : 1003e0000000000000048 f11 : 1003e0000000000000050
r1  : a000000100d130a0 r2  : 0000000000000003 r3  : 0000000000000001
r8  : e000000468054028 r9  : e000000468054000 r10 : 0000000000000004
r11 : a00000010095a880 r12 : e000000467bcfc30 r13 : e000000467bc8000
r14 : e0000004681452e8 r15 : e000000107f77940 r16 : e000000107f77978
r17 : 0000000000000002 r18 : 000000000000000a r19 : e000000467bc8c34
r20 : 0000000000000020 r21 : e000000108bb2cac r22 : e000000108a8cdc8
r23 : 0000000000000002 r24 : e0000001000065d8 r25 : 6b6b6b6b6b6b6b6b
r26 : e000000108bb2cc0 r27 : 0000000000000002 r28 : 0000000000000002
r29 : 0000000000004000 r30 : 0000000000004000 r31 : 0000000000000011
Call Trace:
 [<a000000100012380>] show_stack+0x40/0xa0
                                sp=e000000467bcf7c0 bsp=e000000467bc9040
 [<a000000100012c80>] show_regs+0x840/0x880
                                sp=e000000467bcf990 bsp=e000000467bc8fe0
 [<a000000100034d80>] die+0x1c0/0x2a0
                                sp=e000000467bcf990 bsp=e000000467bc8f98
 [<a0000001005db5b0>] ia64_do_page_fault+0x8d0/0xa00
                                sp=e000000467bcf9b0 bsp=e000000467bc8f48
 [<a00000010000b300>] ia64_leave_kernel+0x0/0x280
                                sp=e000000467bcfa60 bsp=e000000467bc8f48
 [<a000000100094880>] __rcu_process_callbacks+0x3e0/0x660
                                sp=e000000467bcfc30 bsp=e000000467bc8f00
 [<a000000100094b40>] rcu_process_callbacks+0x40/0xa0
                                sp=e000000467bcfc30 bsp=e000000467bc8ee0
 [<a000000100078fd0>] tasklet_action+0x1d0/0x340
                                sp=e000000467bcfc30 bsp=e000000467bc8eb8
 [<a0000001000785b0>] __do_softirq+0xf0/0x240
                                sp=e000000467bcfc30 bsp=e000000467bc8e40
 [<a000000100078770>] do_softirq+0x70/0xc0
                                sp=e000000467bcfc30 bsp=e000000467bc8dd8
 [<a0000001000789c0>] irq_exit+0x80/0xa0
                                sp=e000000467bcfc30 bsp=e000000467bc8dc0
 [<a000000100010030>] ia64_handle_irq+0x250/0x280
                                sp=e000000467bcfc30 bsp=e000000467bc8d90
 [<a00000010000b300>] ia64_leave_kernel+0x0/0x280
                                sp=e000000467bcfc30 bsp=e000000467bc8d90
 [<a0000001005d6940>] _spin_unlock_irqrestore+0x40/0x80
                                sp=e000000467bcfe00 bsp=e000000467bc8d60
 [<a000000100064cf0>] wait_task_inactive+0x90/0x120
                                sp=e000000467bcfe00 bsp=e000000467bc8d38
 [<a0000001000e1a70>] sys_ptrace+0x890/0x1180
                                sp=e000000467bcfe10 bsp=e000000467bc8cb0
 [<a00000010000b0b0>] ia64_trace_syscall+0xd0/0x110
                                sp=e000000467bcfe30 bsp=e000000467bc8cb0
 [<a000000000010620>] __start_ivt_text+0xffffffff00010620/0x400
                                sp=e000000467bd0000 bsp=e000000467bc8cb0
Kernel panic - not syncing: Aiee, killing interrupt handler!
-------------------------------------------------------------
a0000001000944a0 <__rcu_process_callbacks>:
                ...
a000000100094830:       nop.m 0x0
a000000100094836:       cmp.eq p8,p9=0,r39;;            # while (list)
a00000010009483c:       nop.i 0x0

a000000100094840: (p09) mov r35=1
a000000100094846:       nop.m 0x0
a00000010009484c: (p09) br.cond.dptk.few a0000001000948e0 
<__rcu_process_callbacks+0x440>

a000000100094850:       nop.m 0x0
a000000100094856:       nop.i 0x0
a00000010009485c:       br.few a000000100094ae0 <__rcu_process_callbacks+0x640>

a000000100094860:       nop.m 0x0
a000000100094866:       ld8 r34=[r14],8         # r34 = next = list->next
                                                # r14 = &list->func
a00000010009486c:       nop.i 0x0;;

a000000100094870:       lfetch [r34]            # prefetch(next)
a000000100094876:       nop.m 0x0
a00000010009487c:       nop.i 0x0;;

a000000100094880:       ld8 r25=[r14];;         # r25 = list->func
a000000100094886: ===>  ld8 r26=[r25],8         # something with something 
called .opd (?)
                                                # I couldn't understand
a00000010009488c:       nop.i 0x0;;

a000000100094890:       ld8 r1=[r25]
a000000100094896:       mov b6=r26
a00000010009489c:       br.call.sptk.many b0=b6;;       # list->func()



Looks like place corresponds to function call in rcu_do_batch()

        list = rdp->donelist;
        while (list) {
                next = list->next;
                prefetch(next);
                list->func(list);
                list = next;
                        ...

-------------------------------------------------------------
#include <stdio.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/ptrace.h>
#include <signal.h>

static void *thread_func(void *arg)
{
        execl("/proc/self/exe", NULL);
        return NULL;
}

int main(int argc, const char *argv[])
{
        pthread_t thread;
        int pid, n;

        if (argv[0] && (pid = fork()))
                for (n = 1;; ++n) {
                        ptrace(PTRACE_ATTACH, pid, NULL, 0);
                        ptrace(PTRACE_DETACH, pid, NULL, 0);
                        if (!(n % 100000))
                                printf("passed: %d\n", n);
                }

        if (pthread_create(&thread, NULL, thread_func, NULL))
                perror("pthread_create");

        while (1)
                pause();
        return 1;
}

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to