Have you tried to increase the stack size for NSH ? Stack overflow in NSH can cause some really weird hard faults, not that easy to diagnose.
czw., 11 maj 2023 o 18:34 Sebastien Lorquet <sebast...@lorquet.fr> napisaĆ(a): > Hello, > > I have a stm32h7 board, based on the stm32h743zi2 nucleo. > > I have activated the watchdog. > > The method to do the kthread was copied from this : > > > https://github.com/apache/nuttx/blob/master/boards/arm/stm32/photon/src/stm32_wdt.c#LL146C7-L146C7 > > The watchdog works, the system is stable > > > But now, the nsh 'ps' command ends up with a crash when trying to list > the kthread that resets the watchdog. > > > The problem has appeared between current trunk and revision > 13d823f30710e6fabd3d6429a03bc37e1086c9e7 > > > Here is the GDB session, a bit after cmd_ps is entered: > > (pid 4 is the watchog maintenance kthread created like in the file above) > > > open (path=0x3800c770 "/proc/4/cmdline", oflags=1) at vfs/fs_open.c:447 > 447 if (fd < 0) > (gdb) n > 453 leave_cancellation_point(); > (gdb) > 454 return fd; > (gdb) > 455 } > (gdb) > nsh_readfile (vtbl=0x38005688, cmd=0x807ab7c "ps", filepath=0x3800c770 > "/proc/4/cmdline", buffer=0x380056b0 " 0.0%", buflen=512) > at nsh_fsutils.c:219 > 219 if (fd < 0) > (gdb) print fd > $1 = 4 > (gdb) n > 229 ntotal = 0; /* No bytes read yet */ > (gdb) > 230 *buffer = '\0'; /* NUL terminate the empty buffer */ > (gdb) > 231 bufptr = buffer; /* Working pointer */ > (gdb) > 232 remaining = buflen - 1; /* Reserve one byte for a NUL > terminator */ > (gdb) > 233 ret = ERROR; /* Assume failure */ > (gdb) > 237 nread = read(fd, bufptr, remaining); > (gdb) s > read (fd=4, buf=0x380056b0, nbytes=511) at vfs/fs_read.c:166 > 166 enter_cancellation_point(); > (gdb) n > 170 ret = nx_read(fd, buf, nbytes); > (gdb) s > nx_read (fd=4, buf=0x380056b0, nbytes=511) at vfs/fs_read.c:132 > 132 ret = (ssize_t)fs_getfilep(fd, &filep); > (gdb) n > 133 if (ret < 0) > (gdb) > 140 return file_read(filep, buf, nbytes); > (gdb) s > file_read (filep=0x38002e40, buf=0x380056b0, nbytes=511) at > vfs/fs_read.c:67 > 67 int ret = -EBADF; > (gdb) n > 69 DEBUGASSERT(filep); > (gdb) > 70 inode = filep->f_inode; > (gdb) > 74 if ((filep->f_oflags & O_RDOK) == 0) > (gdb) > 85 else if (inode != NULL && inode->u.i_ops && inode->u.i_ops->read) > (gdb) > 92 ret = (int)inode->u.i_ops->read(filep, > (gdb) s > procfs_read (filep=0x38002e40, buffer=0x380056b0 "", buflen=511) at > procfs/fs_procfs.c:445 > 445 handler = (FAR struct procfs_file_s *)filep->f_priv; > (gdb) n > 446 DEBUGASSERT(handler); > (gdb) > 450 return handler->procfsentry->ops->read(filep, buffer, buflen); > (gdb) s > proc_read (filep=0x38002e40, buffer=0x380056b0 "", buflen=511) at > procfs/fs_procfsproc.c:1552 > 1552 procfile = (FAR struct proc_file_s *)filep->f_priv; > (gdb) n > 1553 DEBUGASSERT(procfile != NULL); > (gdb) > 1557 tcb = nxsched_get_tcb(procfile->pid); > (gdb) > 1558 if (tcb == NULL) > (gdb) print tcb > $2 = (struct tcb_s *) 0x38003f28 > (gdb) print *tcb > $3 = {flink = 0x38008640, blink = 0x0, group = 0x38003fe0, pid = 4, > sched_priority = 100 'd', init_priority = 100 'd', > start = 0x802fc91 <nxtask_start>, entry = {pthread = 0x806ecf1 > <wdog_daemon>, main = 0x806ecf1 <wdog_daemon>}, task_state = 6 '\006', > flags = 4138, lockcount = 0, cpcount = 0, errcode = 0, timeslice = > 200, waitdog = {next = 0x38005a40, arg = 939540264, > func = 0x802ecdd <nxsig_timeout>, lag = 316}, adj_stack_size = 960, > stack_alloc_ptr = 0x38004328, stack_base_ptr = 0x38004368, waitobj = 0x0, > sigprocmask = {_elem = {0, 0}}, sigwaitmask = {_elem = {0, 0}}, > sigpendactionq = {head = 0x0, tail = 0x0}, sigpostedq = {head = 0x0, > tail = 0x0}, sigunbinfo = {si_signo = 255 '\377', si_code = 2 > '\002', si_errno = 110 'n', si_value = {sival_int = 0, sival_ptr = 0x0}, > si_user = 0x0}, mhead = 0x0, ticks = 0, xcp = {sigdeliver = 0x0, > saved_regs = 0x0, regs = 0x3800449c}, > name = "watchdog\000\000\000\000\000\000\000\000"} > (gdb) n > 1566 switch (procfile->node->node) > (gdb) n > 1573 ret = proc_cmdline(procfile, tcb, buffer, buflen, > filep->f_pos); > (gdb) s > proc_cmdline (procfile=0x3800caa0, tcb=0x38003f28, buffer=0x380056b0 "", > buflen=511, offset=0) at procfs/fs_procfsproc.c:664 > 664 remaining = buflen; > (gdb) n > 665 totalsize = 0; > (gdb) > 670 name = tcb->name; > (gdb) > 674 linesize = strlen(name); > (gdb) > 675 memcpy(procfile->line, name, linesize); > (gdb) print linesize > $4 = 8 > (gdb) n > 676 copysize = procfs_memcpy(procfile->line, linesize, buffer, > remaining, > (gdb) n > 679 totalsize += copysize; > (gdb) print copysize > $5 = 8 > (gdb) n > 680 buffer += copysize; > (gdb) n > 681 remaining -= copysize; > (gdb) n > 683 if (totalsize >= buflen) > (gdb) print remaining > $6 = 503 > (gdb) n > 690 linesize = group_argvstr(tcb, procfile->line, remaining); > (gdb) s > group_argvstr (tcb=0x38003f28, args=0x3800caac "watchdog", size=503) at > group/group_argvstr.c:61 > 61 size_t n = 0; > (gdb) n > 68 if (!tcb || !tcb->group || !tcb->group->tg_info) > (gdb) > 84 if ((tcb->flags & TCB_FLAG_TTYPE_MASK) == TCB_FLAG_TTYPE_PTHREAD) > (gdb) > 93 FAR char **argv = tcb->group->tg_info->argv + 1; > (gdb) > 95 while (*argv != NULL && n < size) > (gdb) > 97 n += snprintf(args + n, size - n, " %s", *argv++); > (gdb) n > > *CRASH* > > When I just run, the stack dump shows a crash at > > Fault: IRQ: 3 regs: 0x380037ec > arm_busfault: BASEPRI: 000000f0 PRIMASK: 00000000 IPSR: 00000003 > CONTROL: 00000000 > arm_busfault: CFSR: 00008200 HFSR: 40000000 DFSR: 00000000 BFAR: > 00000003 AFSR: 00000000 > arm_busfault: Bus Fault Reason: > arm_busfault: Precise data bus error > _assert: Current Version: NuttX bca 12.0.0-RC1 2eac660ff6-dirty May 11 > 2023 15:27:06 arm > _assert: Assertion failed panic: at file: armv7-m/arm_busfault.c:106 > task: nsh_main 0x8039c41 > up_dump_register: R0: 00000003 R1: ffffffff R2: 38003a1c R3: 00000003 > up_dump_register: R4: 08039e89 R5: 3800571e R6: 38005734 FP: 380038c0 > up_dump_register: R8: 00000000 SB: 00000000 SL: 00000000 R11: 00000000 > up_dump_register: IP: a0000000 SP: 380038c0 LR: 08035537 PC: 080367a8 > up_dump_register: xPSR: a1000000 PRIMASK: 00000000 CONTROL: 00000000 > > PC=nuttx/libs/libc/string/lib_strnlen.c:37 (discriminator 3) > LR=nuttx/libs/libc/stdio/lib_libvsprintf.c:940 (discriminator 4) > > This is trying to measure the length of the command line > > Running again with a breakpoint in proc_cmdline: > > Breakpoint 1, proc_cmdline (procfile=0x3800caa0, tcb=0x38003f28, > buffer=0x380056b0 "", buflen=511, offset=0) at procfs/fs_procfsproc.c:664 > 664 remaining = buflen; > (gdb) print *tcb > $4 = {flink = 0x38008640, blink = 0x0, group = 0x38003fe0, pid = 4, > sched_priority = 100 'd', init_priority = 100 'd', > start = 0x802fc91 <nxtask_start>, entry = {pthread = 0x806ecf1 > <wdog_daemon>, main = 0x806ecf1 <wdog_daemon>}, task_state = 6 '\006', > flags = 4138, lockcount = 0, cpcount = 0, errcode = 0, timeslice = > 200, waitdog = {next = 0x0, arg = 939540264, > func = 0x802ecdd <nxsig_timeout>, lag = 419}, adj_stack_size = 960, > stack_alloc_ptr = 0x38004328, stack_base_ptr = 0x38004368, waitobj = 0x0, > sigprocmask = {_elem = {0, 0}}, sigwaitmask = {_elem = {0, 0}}, > sigpendactionq = {head = 0x0, tail = 0x0}, sigpostedq = {head = 0x0, > tail = 0x0}, sigunbinfo = {si_signo = 255 '\377', si_code = 2 > '\002', si_errno = 110 'n', si_value = {sival_int = 0, sival_ptr = 0x0}, > si_user = 0x0}, mhead = 0x0, ticks = 0, xcp = {sigdeliver = 0x0, > saved_regs = 0x0, regs = 0x3800449c}, > name = "watchdog\000\000\000\000\000\000\000\000"} > > (gdb) print *tcb->group > $5 = {flink = 0x38002b40, tg_pid = 4, tg_ppid = 0, tg_flags = 0 '\000', > tg_nmembers = 1 '\001', tg_mxmembers = 4 '\004', tg_members = 0x38004078, > tg_nwaiters = 0 '\000', tg_waitflags = 0 '\000', tg_exitsem = > {semcount = 0, flags = 0 '\000', waitlist = {head = 0x0, tail = 0x0}}, > tg_statloc = 0x0, tg_joinlock = {sem = {semcount = 1, flags = 0 > '\000', waitlist = {head = 0x0, tail = 0x0}}, holder = -1}, tg_joinhead > = 0x0, > tg_jointail = 0x0, tg_info = 0x38004090, tg_sigactionq = {head = 0x0, > tail = 0x0}, tg_sigpendingq = {head = 0x0, tail = 0x0}, tg_envp = 0x0, > tg_envc = 0, itimer = 0x0, tg_filelist = {fl_lock = {sem = {semcount > = 1, flags = 0 '\000', waitlist = {head = 0x0, tail = 0x0}}, holder = -1}, > fl_rows = 1 '\001', fl_files = 0x38004250}, tg_mm_map = {mm_map_sq > = {head = 0x0, tail = 0x0}, mm_map_mutex = {mutex = {sem = {semcount = 1, > flags = 0 '\000', waitlist = {head = 0x0, tail = 0x0}}, > holder = -1}, count = 0}}} > > (gdb) print *tcb->group->tg_info > $6 = {ta_lock = {sem = {semcount = 1, flags = 0 '\000', waitlist = {head > = 0x0, tail = 0x0}}, holder = -1}, argv = 0x38004350, > ta_tlsset = 0 '\000', ta_tlsdtor = {0x0, 0x0, 0x0, 0x0}, ta_getopt = > {go_optarg = 0x0, go_opterr = 0, go_optind = 0, go_optopt = 0, > go_optptr = 0x0, go_binitialized = false}, ta_umask = 0, > ta_streamlist = {sl_lock = {sem = {semcount = 1, flags = 0 '\000', > waitlist = { > head = 0x0, tail = 0x0}}, holder = -1}, sl_std = {{fs_next = > 0x0, fs_lock = {mutex = {sem = {semcount = 1, flags = 0 '\000', > waitlist = {head = 0x0, tail = 0x0}}, holder = -1}, count > = 0}, fs_fd = 0, fs_bufstart = 0x38004110 "", > fs_bufend = 0x38004150 "\001", fs_bufpos = 0x38004110 "", > fs_bufread = 0x38004110 "", fs_buffer = '\000' <repeats 63 times>, > fs_oflags = 1, fs_flags = 12 '\f', fs_nungotten = 0 '\000', > fs_ungotten = "\000"}, {fs_next = 0x0, fs_lock = {mutex = {sem = { > semcount = 1, flags = 0 '\000', waitlist = {head = 0x0, > tail = 0x0}}, holder = -1}, count = 0}, fs_fd = 1, > fs_bufstart = 0x38004184 "", fs_bufend = 0x380041c4 "\006", > fs_bufpos = 0x38004184 "", fs_bufread = 0x38004184 "", > fs_buffer = '\000' <repeats 63 times>, fs_oflags = 6, fs_flags > = 12 '\f', fs_nungotten = 0 '\000', fs_ungotten = "\000"}, {fs_next = 0x0, > fs_lock = {mutex = {sem = {semcount = 1, flags = 0 '\000', > waitlist = {head = 0x0, tail = 0x0}}, holder = -1}, count = 0}, fs_fd = 2, > fs_bufstart = 0x380041f8 "", fs_bufend = 0x38004238 "\006", > fs_bufpos = 0x380041f8 "", fs_bufread = 0x380041f8 "", > fs_buffer = '\000' <repeats 63 times>, fs_oflags = 6, fs_flags > = 12 '\f', fs_nungotten = 0 '\000', fs_ungotten = "\000"}}, sl_head = 0x0, > sl_tail = 0x0}} > > We see that argv = 0x38004350, which is in the stack: stack_alloc_ptr = > 0x38004328, stack_base_ptr = 0x38004368 > > (gdb) print (uint32_t[10])*tcb->group->tg_info->argv > $8 = {939541660, 3, 2, 939541660, 939541352, 134362499, 0, 0, 939541660, 3} > (gdb) print (char*[10])*tcb->group->tg_info->argv > $9 = {0x3800449c "pE", 0x3 "", 0x2 "", 0x3800449c "pE", 0x38004368 "", > 0x8023583 <arm_hardfault+78> "F+\340{j\003\360\200C", 0x0, 0x0, > 0x3800449c "pE", 0x3 ""} > > This is not good! Too many non null values in that array, some of them > are not pointers! The value 3 is found in the BFAR of the crash dump. > > kthread_create was called with argv = NULL > > This calls kthread_create_with_stack with argv = NULL > > This calls nxthread_create with argv = NULL > > This calls nxtask_init with argv = NULL, envp=NULL > > goto sched/task/task_init.c > > This goes to nxtask_setup_arguments with argv=NULL > > goto sched/task/task_setup.c > > This calls nxtask_setup_stackargs > > Looks like this function does the right thing. > > I should get argc=0, allocation of two pointers, one for the task name > and one for the final null arg. > > > Then what is happening? Why do I see invalid values in the argv array? > > > Can someone help me? Can someone try to reproduce this? > > I have built this image with a make distclean, apply config, make, so I > dont expect dangling files. > > > > Thanks, > > Sebastien > > >