Re: Problem with ps and kthread: Reproducible crash -> corrupted by context switching ?

Sebastien Lorquet Fri, 12 May 2023 03:04:54 -0700

it seems not to crash with revision 13d823f307, but there is clearly aproblem with the command line here.

Is the context switch trying to save registers in a place where the argvare stored, because this is not a user task but a kernel thread?


I could not find where the SP of new tasks was initialized.

nsh> ps

PID GROUP PRI POLICY TYPE NPX STATE EVENT SIGMASKSTACK CPU COMMAND 0 0 0 FIFO Kthread N-- Ready 00000000004056 99.5% Idle Task 1 1 224 RR Kthread --- Waiting Semaphore 00000000004024 0.0% hpwork 0x24004f48 2 2 100 RR Kthread --- Waiting Semaphore 00000000004024 0.0% lpwork 0x24004f60 3 3 100 RR Task --- Running 00000000004032 0.5% nsh_main 4 4 100 RR Kthread --- Waiting Signal 00000000000960 0.0% watchdog (E F)▒{j▒C

Can someone help me, by trying to reproduce this, with a kthread_createin their board_init ?



Thanks,

Sebastien


Le 12/05/2023 à 11:08, Sebastien Lorquet a écrit :

Some more gdb stuff found that *something* is corrupting the contentsof argv, silently.
I have used an access watchpoint to catch accesses to argv
The first calls are related to the memset that clears the stack framewhen the task is created, and I see the correct values.
HOWEVER, at some point later, an IRQ 3 pops and shits in my stack

I would really like to know what is happening there.

Look at gdb catching this:

Old value = 939524096
New value = 0
0x08036690 in memset (s=0x38005350, c=0, n=20) at string/lib_memset.c:169
169       while (n-- > 0) *p++ = c;
(gdb)

171       return s;
(gdb)
172     }
(gdb) n
up_stack_frame (tcb=0x38004f28, frame_size=24) atcommon/arm_stackframe.c:9292 tcb->stack_base_ptr = (uint8_t *)tcb->stack_base_ptr +frame_size;
(gdb) n
93        tcb->adj_stack_size -= frame_size;
(gdb) print tcb
$2 = (struct tcb_s *) 0x38004f28
(gdb) print (struct tcb_s *) 0x38004f28
$3 = (struct tcb_s *) 0x38004f28
(gdb) print *(struct tcb_s *) 0x38004f28
$4 = {flink = 0x0, blink = 0x0, group = 0x38004fe0, pid = 4,sched_priority = 100 'd', init_priority = 100 'd', start = 0x802fc91<nxtask_start>, entry = {pthread = 0x806ecf1 <wdog_daemon>, main = 0x806ecf1<wdog_daemon>}, task_state = 4 '\004', flags = 4138, lockcount = 0,cpcount = 0, errcode = 0, timeslice = 200, waitdog = {next = 0x0, arg = 0, func =0x0, lag = 0}, adj_stack_size = 984, stack_alloc_ptr = 0x38005328, stack_base_ptr = 0x38005368, waitobj = 0x0, sigprocmask = {_elem ={0, 0}}, sigwaitmask = {_elem = {0, 0}}, sigpendactionq = {head = 0x0, tail = 0x0}, sigpostedq = {head = 0x0, tail = 0x0}, sigunbinfo ={si_signo = 0 '\000', si_code = 0 '\000', si_errno = 0 '\000',si_value = { sival_int = 0, sival_ptr = 0x0}, si_user = 0x0}, mhead = 0x0,ticks = 0, xcp = {sigdeliver = 0x0, saved_regs = 0x0, regs = 0x38005654},
  name = "watchdog\000\000\000\000\000\000\000\000"}
(gdb) print *((struct tcb_s *) 0x38004f28)->group
$5 = {flink = 0x0, tg_pid = 0, tg_ppid = 0, tg_flags = 0 '\000',tg_nmembers = 0 '\000', tg_mxmembers = 4 '\004', tg_members = 0x38005078, tg_nwaiters = 0 '\000', tg_waitflags = 0 '\000', tg_exitsem ={semcount = 0, flags = 0 '\000', waitlist = {head = 0x0, tail = 0x0}}, tg_statloc = 0x0, tg_joinlock = {sem = {semcount = 1, flags = 0'\000', waitlist = {head = 0x0, tail = 0x0}}, holder = -1},tg_joinhead = 0x0, tg_jointail = 0x0, tg_info = 0x38005090, tg_sigactionq = {head =0x0, tail = 0x0}, tg_sigpendingq = {head = 0x0, tail = 0x0}, tg_envp =0x0, tg_envc = 0, itimer = 0x0, tg_filelist = {fl_lock = {sem = {semcount= 1, flags = 0 '\000', waitlist = {head = 0x0, tail = 0x0}}, holder =-1}, fl_rows = 1 '\001', fl_files = 0x38005250}, tg_mm_map = {mm_map_sq= {head = 0x0, tail = 0x0}, mm_map_mutex = {mutex = {sem = {semcount = 0, flags = 0 '\000', waitlist = {head = 0x0, tail = 0x0}},holder = 0}, count = 0}}}
(gdb) print *((struct tcb_s *) 0x38004f28)->group->tg_info->argv
$6 = 0x0
(gdb) print ((struct tcb_s *) 0x38004f28)->group->tg_info->argv[0]
$7 = 0x0
(gdb) n
97        return ret;
(gdb) n
98      }
(gdb) n
nxtask_setup_stackargs (tcb=0x38004f28, name=0x8080288 "watchdog",argv=0x0) at task/task_setup.c:578
578       DEBUGASSERT(stackargv != NULL);
(gdb) n
579       if (stackargv == NULL)
(gdb)
588       str = (FAR char *)stackargv + argvlen;
(gdb)
594       stackargv[0] = str;
(gdb)

Hardware access (read/write) watchpoint 2: *0x38005350

Old value = 0
New value = 939545432
0x0802fb12 in nxtask_setup_stackargs (tcb=0x38004f28, name=0x8080288"watchdog", argv=0x0) at task/task_setup.c:595
595       nbytes       = strlen(name) + 1;
(gdb)
596       strlcpy(str, name, strtablen);
(gdb)
597       str         += nbytes;
(gdb)
598       strtablen   -= nbytes;
(gdb)
602       for (i = 0; i < argc; i++)
(gdb)
621       stackargv[argc + 1] = NULL;
(gdb)
622       tcb->cmn.group->tg_info->argv = stackargv;
(gdb)
624       return OK;
(gdb)
625     }
(gdb) print ((struct tcb_s *) 0x38004f28)->group->tg_info->argv[0]
$8 = 0x38005358 "watchdog"
(gdb) print ((struct tcb_s *) 0x38004f28)->group->tg_info->argv[1]
$9 = 0x0
(gdb) info watch
Num     Type           Disp Enb Address    What
2       acc watchpoint keep y              *0x38005350
        breakpoint already hit 5 times
(gdb) c
Continuing.

Hardware access (read/write) watchpoint 2: *0x38005350

Old value = 939545432
New value = 939545756
arm_svcall (irq=3, context=0x3800549c, arg=0x0) atarmv7-m/arm_svcall.c:130
130       DEBUGASSERT(regs && regs == CURRENT_REGS);
(gdb) print ((struct tcb_s *) 0x38004f28)->group->tg_info->argv[0]
$10 = 0x3800549c "pU"
(gdb) print ((struct tcb_s *) 0x38004f28)->group->tg_info->argv[1]
$11 = 0x3 ""

BAM! Pu and pointer 3. My memory is corrupted.

What is this exception?

(gdb) bt
#0 arm_svcall (irq=3, context=0x3800549c, arg=0x0) atarmv7-m/arm_svcall.c:130#1 0x08023582 in arm_hardfault (irq=3, context=0x3800549c, arg=0x0)at armv7-m/arm_hardfault.c:120#2 0x0802b2e4 in irq_dispatch (irq=3, context=0x3800549c) atirq/irq_dispatch.c:169
#3  0x08023504 in arm_doirq (irq=3, regs=0x0) at armv7-m/arm_doirq.c:70
#4  0x08022afa in exception_common () at armv7-m/arm_exception.S:224

Let's see

(gdb) n
131       cmd = regs[REG_R0];
(gdb) n
160       switch (cmd)
(gdb) n
205               DEBUGASSERT(regs[REG_R1] != 0 && regs[REG_R2] != 0);
(gdb) print cmd
$12 = 2
(gdb) n
206               *(uint32_t **)regs[REG_R1] = regs;
(gdb) n
207               CURRENT_REGS = (uint32_t *)regs[REG_R2];
(gdb) n
209             break;
(gdb) n
472       return OK;
(gdb) n
473     }

That's a context switch ???

Why is a context switch corrupting the argv contents of this task?

Sebastien


Le 12/05/2023 à 10:18, Sebastien Lorquet a écrit :
Well it's not the stack, same crash with the stack doubled from 4096to 8192, BFAR still contains the suspicious 3 from the argv array
nsh> ps
PID GROUP PRI POLICY TYPE NPX STATE EVENT SIGMASKSTACK CPU COMMAND 0 0 0 FIFO Kthread N-- Ready 0000000000000000 004056100.0% Idle Task 1 1 224 RR Kthread --- Waiting Semaphore0000000000000000 004024 0.0% hpwork 0x240050c8 2 2 100 RR Kthread --- Waiting Semaphore0000000000000000 004024 0.0% lpwork 0x240050e0 3 3 100 RR Task --- Running 0000000000000000008128 0.0% nsh_main 4 4 100 RR Kthread --- Waiting Signal 00000000 IRQ: 3 regs: 0x380047ecarm_busfault: BASEPRI: 000000f0 PRIMASK: 00000000 IPSR: 00000003CONTROL: 00000000arm_busfault: CFSR: 00008200 HFSR: 40000000 DFSR: 00000000 BFAR:00000003 AFSR: 00000000
arm_busfault: Bus Fault Reason:
arm_busfault:   Precise data bus error
_assert: Current Version: NuttX bca 12.0.0-RC1 2eac660ff6-dirty May12 2023 09:23:24 arm_assert: Assertion failed panic: at file: armv7-m/arm_busfault.c:106task: nsh_main 0x8039c41
up_dump_register: R0: 00000003 R1: ffffffff R2: 38004a1c  R3: 00000003
up_dump_register: R4: 08039e89 R5: 3800671e R6: 38006734  FP: 380048c0
up_dump_register: R8: 00000000 SB: 00000000 SL: 00000000 R11: 00000000
up_dump_register: IP: a0000000 SP: 380048c0 LR: 08035537  PC: 080367a8
up_dump_register: xPSR: a1000000 PRIMASK: 00000000 CONTROL: 00000000
up_dump_register: EXC_RETURN: ffffffe9
dump_stack: User Stack:
dump_stack: sp:     0x380048c0
dump_stack:   base: 0x38002ee8
dump_stack:   size: 00008128

<lots of stack dump>

Also the crash dump crashes itself while listing the tasks:
show_tasks: PID GROUP PRI POLICY TYPE NPX STATE EVENTSIGMASK STACKBASE STACKSIZE CPU COMMANDdump_task: 0 0 0 FIFO Kthread N-- Ready 0000000000000000 0x24007478 4056 99.7% Idle Taskdump_task: 1 1 224 RR Kthread --- Waiting Semaphore 0000000000000000 0x380006c0 4024 0.0% hpwork 0x240050c8dump_task: 2 2 100 RR Kthread --- Waiting Semaphore 0000000000000000 0x38001ac8 4024 0.0% lpwork 0x240050e0dump_task: 3 3 100 RR Task --- Running 0000000000000000 0x38002ee8 8128 0.2% nsh_main
I have 10 of them. the crash dump is not able to list the watchdogtask, pid 4.
Look at what I have in proc before the crash:

nsh> ls -l /proc
/proc:
 dr--r--r--       0 0/
 dr--r--r--       0 1/
 dr--r--r--       0 2/
 dr--r--r--       0 3/
 dr--r--r--       0 4/
 dr--r--r--       0 5/
 dr--r--r--       0 6/
 dr--r--r--       0 7/
 dr--r--r--       0 8/
 dr--r--r--       0 9/
 -r--r--r--       0 cpuload
 dr--r--r--       0 fs/
 -r--r--r--       0 iobinfo
 -r--r--r--       0 memdump
 -r--r--r--       0 meminfo
 -r--r--r--       0 mempool
 dr--r--r--       0 net/
 dr--r--r--       0 self/
 -r--r--r--       0 uptime
 -r--r--r--       0 version
nsh> cat /proc/3/cmdline
nsh_main
nsh> cat /proc/5/cmdline
msgbus
nsh> cat /proc/4/cmdline
arm_busfault: PANIC!!! Bus Fault:
arm_busfault:   IRQ: 3 regs: 0x3800489c
arm_busfault: BASEPRI: 000000f0 PRIMASK: 00000000 IPSR: 00000003CONTROL: 00000000arm_busfault: CFSR: 00008200 HFSR: 40000000 DFSR: 00000000 BFAR:00000003 AFSR: 00000000
arm_busfault: Bus Fault Reason:
arm_busfault:   Precise data bus error
_assert: Current Version: NuttX bca 12.0.0-RC1 2eac660ff6-dirty May12 2023 09:23:24 arm_assert: Assertion failed panic: at file: armv7-m/arm_busfault.c:106task: nsh_main 0x8039c41
up_dump_register: R0: 00000003 R1: ffffffff R2: 38004acc  R3: 00000003
up_dump_register: R4: 380068f4 R5: 38002c44 R6: 00000000  FP: 38004970
up_dump_register: R8: 00000000 SB: 00000000 SL: 00000000 R11: 00000000
up_dump_register: IP: a0000000 SP: 38004970 LR: 08035537  PC: 080367a8
up_dump_register: xPSR: a1000000 PRIMASK: 00000000 CONTROL: 00000000
up_dump_register: EXC_RETURN: ffffffe9
dump_stack: User Stack:
dump_stack: sp:     0x38004970
dump_stack:   base: 0x38002ee8
dump_stack:   size: 00008128

same crash.
Step by step debugging shows that nxtask_setup_stackargs installs thecorrect information.
So there is some unexpected corruption between creation and psexecution.
Everything works if I just dont create this task, so I dont believemy own programs can do that.
I'll try to have a closer look at who touches this memory.

How can I enumerate the TCBs via gdb at any point in time?

Sebastien

Le 11/05/2023 à 21:17, Sebastien Lorquet a écrit :
The stack is a good candidate, I see that I reported

stack_alloc_ptr = 0x38004328, stack_base_ptr = 0x38004368

but at the crash we have :

R2: 38003a1c

SP: 380038c0

I will try that tomorrow, thanks for the idea.


Sebastien

On 5/11/23 19:39, raiden00pl wrote:
Have you tried to increase the stack size for NSH ? Stack overflowin NSH
can cause some really weird hard faults, not that easy to diagnose.


czw., 11 maj 2023 o 18:34 Sebastien Lorquet <sebast...@lorquet.fr>
napisał(a):
Hello,

I have a stm32h7 board, based on the stm32h743zi2 nucleo.

I have activated the watchdog.

The method to do the kthread was copied from this :
https://github.com/apache/nuttx/blob/master/boards/arm/stm32/photon/src/stm32_wdt.c#LL146C7-L146C7
The watchdog works, the system is stable
But now, the nsh 'ps' command ends up with a crash when trying tolist
the kthread that resets the watchdog.


The problem has appeared between current trunk and revision
13d823f30710e6fabd3d6429a03bc37e1086c9e7


Here is the GDB session, a bit after cmd_ps is entered:
(pid 4 is the watchog maintenance kthread created like in the fileabove)
open (path=0x3800c770 "/proc/4/cmdline", oflags=1) atvfs/fs_open.c:447
447       if (fd < 0)
(gdb) n
453       leave_cancellation_point();
(gdb)
454       return fd;
(gdb)
455     }
(gdb)
nsh_readfile (vtbl=0x38005688, cmd=0x807ab7c "ps",filepath=0x3800c770
"/proc/4/cmdline", buffer=0x380056b0 "  0.0%", buflen=512)
      at nsh_fsutils.c:219
219       if (fd < 0)
(gdb) print fd
$1 = 4
(gdb) n
229       ntotal    = 0;          /* No bytes read yet */
(gdb)
230 *buffer = '\0'; /* NUL terminate the emptybuffer */
(gdb)
231       bufptr    = buffer;     /* Working pointer */
(gdb)
232       remaining = buflen - 1; /* Reserve one byte for a NUL
terminator */
(gdb)
233       ret       = ERROR;      /* Assume failure */
(gdb)
237           nread = read(fd, bufptr, remaining);
(gdb) s
read (fd=4, buf=0x380056b0, nbytes=511) at vfs/fs_read.c:166
166       enter_cancellation_point();
(gdb) n
170       ret = nx_read(fd, buf, nbytes);
(gdb) s
nx_read (fd=4, buf=0x380056b0, nbytes=511) at vfs/fs_read.c:132
132       ret = (ssize_t)fs_getfilep(fd, &filep);
(gdb) n
133       if (ret < 0)
(gdb)
140       return file_read(filep, buf, nbytes);
(gdb) s
file_read (filep=0x38002e40, buf=0x380056b0, nbytes=511) at
vfs/fs_read.c:67
67        int ret = -EBADF;
(gdb) n
69        DEBUGASSERT(filep);
(gdb)
70        inode = filep->f_inode;
(gdb)
74        if ((filep->f_oflags & O_RDOK) == 0)
(gdb)
85 else if (inode != NULL && inode->u.i_ops &&inode->u.i_ops->read)
(gdb)
92            ret = (int)inode->u.i_ops->read(filep,
(gdb) s
procfs_read (filep=0x38002e40, buffer=0x380056b0 "", buflen=511) at
procfs/fs_procfs.c:445
445       handler = (FAR struct procfs_file_s *)filep->f_priv;
(gdb) n
446       DEBUGASSERT(handler);
(gdb)
450 return handler->procfsentry->ops->read(filep, buffer,buflen);
(gdb) s
proc_read (filep=0x38002e40, buffer=0x380056b0 "", buflen=511) at
procfs/fs_procfsproc.c:1552
1552      procfile = (FAR struct proc_file_s *)filep->f_priv;
(gdb) n
1553      DEBUGASSERT(procfile != NULL);
(gdb)
1557      tcb = nxsched_get_tcb(procfile->pid);
(gdb)
1558      if (tcb == NULL)
(gdb) print tcb
$2 = (struct tcb_s *) 0x38003f28
(gdb) print *tcb
$3 = {flink = 0x38008640, blink = 0x0, group = 0x38003fe0, pid = 4,
sched_priority = 100 'd', init_priority = 100 'd',
    start = 0x802fc91 <nxtask_start>, entry = {pthread = 0x806ecf1
<wdog_daemon>, main = 0x806ecf1 <wdog_daemon>}, task_state = 6'\006', flags = 4138, lockcount = 0, cpcount = 0, errcode = 0,timeslice =
200, waitdog = {next = 0x38005a40, arg = 939540264,
func = 0x802ecdd <nxsig_timeout>, lag = 316}, adj_stack_size= 960,stack_alloc_ptr = 0x38004328, stack_base_ptr = 0x38004368, waitobj= 0x0,
    sigprocmask = {_elem = {0, 0}}, sigwaitmask = {_elem = {0, 0}},
sigpendactionq = {head = 0x0, tail = 0x0}, sigpostedq = {head = 0x0,
      tail = 0x0}, sigunbinfo = {si_signo = 255 '\377', si_code = 2
'\002', si_errno = 110 'n', si_value = {sival_int = 0, sival_ptr =0x0}, si_user = 0x0}, mhead = 0x0, ticks = 0, xcp = {sigdeliver =0x0,
saved_regs = 0x0, regs = 0x3800449c},
    name = "watchdog\000\000\000\000\000\000\000\000"}
(gdb) n
1566      switch (procfile->node->node)
(gdb) n
1573          ret = proc_cmdline(procfile, tcb, buffer, buflen,
filep->f_pos);
(gdb) s
proc_cmdline (procfile=0x3800caa0, tcb=0x38003f28,buffer=0x380056b0 "",
buflen=511, offset=0) at procfs/fs_procfsproc.c:664
664       remaining = buflen;
(gdb) n
665       totalsize = 0;
(gdb)
670       name       = tcb->name;
(gdb)
674       linesize   = strlen(name);
(gdb)
675       memcpy(procfile->line, name, linesize);
(gdb) print linesize
$4 = 8
(gdb) n
676 copysize = procfs_memcpy(procfile->line, linesize,buffer,
remaining,
(gdb) n
679       totalsize += copysize;
(gdb) print copysize
$5 = 8
(gdb) n
680       buffer    += copysize;
(gdb) n
681       remaining -= copysize;
(gdb) n
683       if (totalsize >= buflen)
(gdb) print remaining
$6 = 503
(gdb) n
690       linesize   = group_argvstr(tcb, procfile->line, remaining);
(gdb) s
group_argvstr (tcb=0x38003f28, args=0x3800caac "watchdog",size=503) at
group/group_argvstr.c:61
61        size_t n = 0;
(gdb) n
68        if (!tcb || !tcb->group || !tcb->group->tg_info)
(gdb)
84 if ((tcb->flags & TCB_FLAG_TTYPE_MASK) ==TCB_FLAG_TTYPE_PTHREAD)
(gdb)
93            FAR char **argv = tcb->group->tg_info->argv + 1;
(gdb)
95            while (*argv != NULL && n < size)
(gdb)
97                n += snprintf(args + n, size - n, " %s", *argv++);
(gdb) n

*CRASH*

When I just run, the stack dump shows a crash at

Fault:   IRQ: 3 regs: 0x380037ec
arm_busfault:   BASEPRI: 000000f0 PRIMASK: 00000000 IPSR: 00000003
CONTROL: 00000000
arm_busfault:   CFSR: 00008200 HFSR: 40000000 DFSR: 00000000 BFAR:
00000003 AFSR: 00000000
arm_busfault: Bus Fault Reason:
arm_busfault:   Precise data bus error
_assert: Current Version: NuttX bca 12.0.0-RC1 2eac660ff6-dirtyMay 11
2023 15:27:06 arm
_assert: Assertion failed panic: at file: armv7-m/arm_busfault.c:106
task: nsh_main 0x8039c41
up_dump_register: R0: 00000003 R1: ffffffff R2: 38003a1c R3: 00000003
up_dump_register: R4: 08039e89 R5: 3800571e R6: 38005734 FP: 380038c0
up_dump_register: R8: 00000000 SB: 00000000 SL: 00000000 R11:00000000
up_dump_register: IP: a0000000 SP: 380038c0 LR: 08035537 PC: 080367a8
up_dump_register: xPSR: a1000000 PRIMASK: 00000000 CONTROL: 00000000

PC=nuttx/libs/libc/string/lib_strnlen.c:37 (discriminator 3)
LR=nuttx/libs/libc/stdio/lib_libvsprintf.c:940 (discriminator 4)

This is trying to measure the length of the command line

Running again with a breakpoint in proc_cmdline:

Breakpoint 1, proc_cmdline (procfile=0x3800caa0, tcb=0x38003f28,
buffer=0x380056b0 "", buflen=511, offset=0) atprocfs/fs_procfsproc.c:664
664       remaining = buflen;
(gdb) print *tcb
$4 = {flink = 0x38008640, blink = 0x0, group = 0x38003fe0, pid = 4,
sched_priority = 100 'd', init_priority = 100 'd',
    start = 0x802fc91 <nxtask_start>, entry = {pthread = 0x806ecf1
<wdog_daemon>, main = 0x806ecf1 <wdog_daemon>}, task_state = 6'\006', flags = 4138, lockcount = 0, cpcount = 0, errcode = 0,timeslice =
200, waitdog = {next = 0x0, arg = 939540264,
func = 0x802ecdd <nxsig_timeout>, lag = 419}, adj_stack_size= 960,stack_alloc_ptr = 0x38004328, stack_base_ptr = 0x38004368, waitobj= 0x0,
    sigprocmask = {_elem = {0, 0}}, sigwaitmask = {_elem = {0, 0}},
sigpendactionq = {head = 0x0, tail = 0x0}, sigpostedq = {head = 0x0,
      tail = 0x0}, sigunbinfo = {si_signo = 255 '\377', si_code = 2
'\002', si_errno = 110 'n', si_value = {sival_int = 0, sival_ptr =0x0}, si_user = 0x0}, mhead = 0x0, ticks = 0, xcp = {sigdeliver =0x0,
saved_regs = 0x0, regs = 0x3800449c},
    name = "watchdog\000\000\000\000\000\000\000\000"}

(gdb) print *tcb->group
$5 = {flink = 0x38002b40, tg_pid = 4, tg_ppid = 0, tg_flags = 0'\000',tg_nmembers = 1 '\001', tg_mxmembers = 4 '\004', tg_members =0x38004078,
    tg_nwaiters = 0 '\000', tg_waitflags = 0 '\000', tg_exitsem =
{semcount = 0, flags = 0 '\000', waitlist = {head = 0x0, tail =0x0}},
    tg_statloc = 0x0, tg_joinlock = {sem = {semcount = 1, flags = 0
'\000', waitlist = {head = 0x0, tail = 0x0}}, holder = -1},tg_joinhead
= 0x0,
tg_jointail = 0x0, tg_info = 0x38004090, tg_sigactionq = {head= 0x0,tail = 0x0}, tg_sigpendingq = {head = 0x0, tail = 0x0}, tg_envp =0x0, tg_envc = 0, itimer = 0x0, tg_filelist = {fl_lock = {sem ={semcount= 1, flags = 0 '\000', waitlist = {head = 0x0, tail = 0x0}},holder = -1}, fl_rows = 1 '\001', fl_files = 0x38004250}, tg_mm_map ={mm_map_sq= {head = 0x0, tail = 0x0}, mm_map_mutex = {mutex = {sem ={semcount = 1,
            flags = 0 '\000', waitlist = {head = 0x0, tail = 0x0}},
holder = -1}, count = 0}}}

(gdb) print *tcb->group->tg_info
$6 = {ta_lock = {sem = {semcount = 1, flags = 0 '\000', waitlist ={head
= 0x0, tail = 0x0}}, holder = -1}, argv = 0x38004350,
ta_tlsset = 0 '\000', ta_tlsdtor = {0x0, 0x0, 0x0, 0x0},ta_getopt =
{go_optarg = 0x0, go_opterr = 0, go_optind = 0, go_optopt = 0,
      go_optptr = 0x0, go_binitialized = false}, ta_umask = 0,
ta_streamlist = {sl_lock = {sem = {semcount = 1, flags = 0 '\000',
waitlist = {
head = 0x0, tail = 0x0}}, holder = -1}, sl_std ={{fs_next =
0x0, fs_lock = {mutex = {sem = {semcount = 1, flags = 0 '\000',
waitlist = {head = 0x0, tail = 0x0}}, holder =-1}, count
= 0}, fs_fd = 0, fs_bufstart = 0x38004110 "",
          fs_bufend = 0x38004150 "\001", fs_bufpos = 0x38004110 "",
fs_bufread = 0x38004110 "", fs_buffer = '\000' <repeats 63 times>,
          fs_oflags = 1, fs_flags = 12 '\f', fs_nungotten = 0 '\000',
fs_ungotten = "\000"}, {fs_next = 0x0, fs_lock = {mutex = {sem = {
semcount = 1, flags = 0 '\000', waitlist = {head =0x0,
tail = 0x0}}, holder = -1}, count = 0}, fs_fd = 1,
          fs_bufstart = 0x38004184 "", fs_bufend = 0x380041c4 "\006",
fs_bufpos = 0x38004184 "", fs_bufread = 0x38004184 "",
fs_buffer = '\000' <repeats 63 times>, fs_oflags = 6,fs_flags= 12 '\f', fs_nungotten = 0 '\000', fs_ungotten = "\000"},{fs_next = 0x0,
          fs_lock = {mutex = {sem = {semcount = 1, flags = 0 '\000',
waitlist = {head = 0x0, tail = 0x0}}, holder = -1}, count = 0},fs_fd = 2,
          fs_bufstart = 0x380041f8 "", fs_bufend = 0x38004238 "\006",
fs_bufpos = 0x380041f8 "", fs_bufread = 0x380041f8 "",
fs_buffer = '\000' <repeats 63 times>, fs_oflags = 6,fs_flags= 12 '\f', fs_nungotten = 0 '\000', fs_ungotten = "\000"}},sl_head = 0x0,
      sl_tail = 0x0}}
We see that argv = 0x38004350, which is in the stack:stack_alloc_ptr =
0x38004328, stack_base_ptr = 0x38004368

(gdb) print (uint32_t[10])*tcb->group->tg_info->argv
$8 = {939541660, 3, 2, 939541660, 939541352, 134362499, 0, 0,939541660, 3}
(gdb) print (char*[10])*tcb->group->tg_info->argv
$9 = {0x3800449c "pE", 0x3 "", 0x2 "", 0x3800449c "pE", 0x38004368"",
0x8023583 <arm_hardfault+78> "F+\340{j\003\360\200C", 0x0, 0x0,
    0x3800449c "pE", 0x3 ""}
This is not good! Too many non null values in that array, some ofthem
are not pointers! The value 3 is found in the BFAR of the crash dump.

kthread_create was called with argv = NULL

This calls kthread_create_with_stack with argv = NULL

This calls nxthread_create with argv = NULL

This calls nxtask_init with argv = NULL, envp=NULL

goto sched/task/task_init.c

This goes to nxtask_setup_arguments with argv=NULL

goto sched/task/task_setup.c

This calls nxtask_setup_stackargs

Looks like this function does the right thing.
I should get argc=0, allocation of two pointers, one for the taskname
and one for the final null arg.
Then what is happening? Why do I see invalid values in the argvarray?
Can someone help me? Can someone try to reproduce this?
I have built this image with a make distclean, apply config, make,so I
dont expect dangling files.



Thanks,

Sebastien

Re: Problem with ps and kthread: Reproducible crash -> corrupted by context switching ?

Reply via email to