Problem with ps and kthread: Reproducible crash
Hello, I have a stm32h7 board, based on the stm32h743zi2 nucleo. I have activated the watchdog. The method to do the kthread was copied from this : https://github.com/apache/nuttx/blob/master/boards/arm/stm32/photon/src/stm32_wdt.c#LL146C7-L146C7 The watchdog works, the system is stable But now, the nsh 'ps' command ends up with a crash when trying to list the kthread that resets the watchdog. The problem has appeared between current trunk and revision 13d823f30710e6fabd3d6429a03bc37e1086c9e7 Here is the GDB session, a bit after cmd_ps is entered: (pid 4 is the watchog maintenance kthread created like in the file above) open (path=0x3800c770 "/proc/4/cmdline", oflags=1) at vfs/fs_open.c:447 447 if (fd < 0) (gdb) n 453 leave_cancellation_point(); (gdb) 454 return fd; (gdb) 455 } (gdb) nsh_readfile (vtbl=0x38005688, cmd=0x807ab7c "ps", filepath=0x3800c770 "/proc/4/cmdline", buffer=0x380056b0 " 0.0%", buflen=512) at nsh_fsutils.c:219 219 if (fd < 0) (gdb) print fd $1 = 4 (gdb) n 229 ntotal = 0; /* No bytes read yet */ (gdb) 230 *buffer = '\0'; /* NUL terminate the empty buffer */ (gdb) 231 bufptr = buffer; /* Working pointer */ (gdb) 232 remaining = buflen - 1; /* Reserve one byte for a NUL terminator */ (gdb) 233 ret = ERROR; /* Assume failure */ (gdb) 237 nread = read(fd, bufptr, remaining); (gdb) s read (fd=4, buf=0x380056b0, nbytes=511) at vfs/fs_read.c:166 166 enter_cancellation_point(); (gdb) n 170 ret = nx_read(fd, buf, nbytes); (gdb) s nx_read (fd=4, buf=0x380056b0, nbytes=511) at vfs/fs_read.c:132 132 ret = (ssize_t)fs_getfilep(fd, &filep); (gdb) n 133 if (ret < 0) (gdb) 140 return file_read(filep, buf, nbytes); (gdb) s file_read (filep=0x38002e40, buf=0x380056b0, nbytes=511) at vfs/fs_read.c:67 67 int ret = -EBADF; (gdb) n 69 DEBUGASSERT(filep); (gdb) 70 inode = filep->f_inode; (gdb) 74 if ((filep->f_oflags & O_RDOK) == 0) (gdb) 85 else if (inode != NULL && inode->u.i_ops && inode->u.i_ops->read) (gdb) 92 ret = (int)inode->u.i_ops->read(filep, (gdb) s procfs_read (filep=0x38002e40, buffer=0x380056b0 "", buflen=511) at procfs/fs_procfs.c:445 445 handler = (FAR struct procfs_file_s *)filep->f_priv; (gdb) n 446 DEBUGASSERT(handler); (gdb) 450 return handler->procfsentry->ops->read(filep, buffer, buflen); (gdb) s proc_read (filep=0x38002e40, buffer=0x380056b0 "", buflen=511) at procfs/fs_procfsproc.c:1552 1552 procfile = (FAR struct proc_file_s *)filep->f_priv; (gdb) n 1553 DEBUGASSERT(procfile != NULL); (gdb) 1557 tcb = nxsched_get_tcb(procfile->pid); (gdb) 1558 if (tcb == NULL) (gdb) print tcb $2 = (struct tcb_s *) 0x38003f28 (gdb) print *tcb $3 = {flink = 0x38008640, blink = 0x0, group = 0x38003fe0, pid = 4, sched_priority = 100 'd', init_priority = 100 'd', start = 0x802fc91 , entry = {pthread = 0x806ecf1 , main = 0x806ecf1 }, task_state = 6 '\006', flags = 4138, lockcount = 0, cpcount = 0, errcode = 0, timeslice = 200, waitdog = {next = 0x38005a40, arg = 939540264, func = 0x802ecdd , lag = 316}, adj_stack_size = 960, stack_alloc_ptr = 0x38004328, stack_base_ptr = 0x38004368, waitobj = 0x0, sigprocmask = {_elem = {0, 0}}, sigwaitmask = {_elem = {0, 0}}, sigpendactionq = {head = 0x0, tail = 0x0}, sigpostedq = {head = 0x0, tail = 0x0}, sigunbinfo = {si_signo = 255 '\377', si_code = 2 '\002', si_errno = 110 'n', si_value = {sival_int = 0, sival_ptr = 0x0}, si_user = 0x0}, mhead = 0x0, ticks = 0, xcp = {sigdeliver = 0x0, saved_regs = 0x0, regs = 0x3800449c}, name = "watchdog\000\000\000\000\000\000\000\000"} (gdb) n 1566 switch (procfile->node->node) (gdb) n 1573 ret = proc_cmdline(procfile, tcb, buffer, buflen, filep->f_pos); (gdb) s proc_cmdline (procfile=0x3800caa0, tcb=0x38003f28, buffer=0x380056b0 "", buflen=511, offset=0) at procfs/fs_procfsproc.c:664 664 remaining = buflen; (gdb) n 665 totalsize = 0; (gdb) 670 name = tcb->name; (gdb) 674 linesize = strlen(name); (gdb) 675 memcpy(procfile->line, name, linesize); (gdb) print linesize $4 = 8 (gdb) n 676 copysize = procfs_memcpy(procfile->line, linesize, buffer, remaining, (gdb) n 679 totalsize += copysize; (gdb) print copysize $5 = 8 (gdb) n 680 buffer += copysize; (gdb) n 681 remaining -= copysize; (gdb) n 683 if (totalsize >= buflen) (gdb) print remaining $6 = 503 (gdb) n 690 linesize = group_argvstr(tcb, procfile->line, remaining); (gdb) s group_argvstr (tcb=0x38003f28, args=0x3800caac "watchdog", size=503) at group/group_argvstr.c:61 61 size_t n = 0; (gdb) n 68 if (!tcb || !tcb->group || !tcb->group->tg_info) (gdb) 84 if ((tcb->flags & TCB_FLAG_TTYPE_MASK) == TCB_FLAG_TTYPE_PTHREAD) (gdb) 93 FAR char **argv = tcb->group->tg_info->argv + 1
Re: Problem with ps and kthread: Reproducible crash
Have you tried to increase the stack size for NSH ? Stack overflow in NSH can cause some really weird hard faults, not that easy to diagnose. czw., 11 maj 2023 o 18:34 Sebastien Lorquet napisał(a): > Hello, > > I have a stm32h7 board, based on the stm32h743zi2 nucleo. > > I have activated the watchdog. > > The method to do the kthread was copied from this : > > > https://github.com/apache/nuttx/blob/master/boards/arm/stm32/photon/src/stm32_wdt.c#LL146C7-L146C7 > > The watchdog works, the system is stable > > > But now, the nsh 'ps' command ends up with a crash when trying to list > the kthread that resets the watchdog. > > > The problem has appeared between current trunk and revision > 13d823f30710e6fabd3d6429a03bc37e1086c9e7 > > > Here is the GDB session, a bit after cmd_ps is entered: > > (pid 4 is the watchog maintenance kthread created like in the file above) > > > open (path=0x3800c770 "/proc/4/cmdline", oflags=1) at vfs/fs_open.c:447 > 447 if (fd < 0) > (gdb) n > 453 leave_cancellation_point(); > (gdb) > 454 return fd; > (gdb) > 455 } > (gdb) > nsh_readfile (vtbl=0x38005688, cmd=0x807ab7c "ps", filepath=0x3800c770 > "/proc/4/cmdline", buffer=0x380056b0 " 0.0%", buflen=512) > at nsh_fsutils.c:219 > 219 if (fd < 0) > (gdb) print fd > $1 = 4 > (gdb) n > 229 ntotal= 0; /* No bytes read yet */ > (gdb) > 230 *buffer = '\0'; /* NUL terminate the empty buffer */ > (gdb) > 231 bufptr= buffer; /* Working pointer */ > (gdb) > 232 remaining = buflen - 1; /* Reserve one byte for a NUL > terminator */ > (gdb) > 233 ret = ERROR; /* Assume failure */ > (gdb) > 237 nread = read(fd, bufptr, remaining); > (gdb) s > read (fd=4, buf=0x380056b0, nbytes=511) at vfs/fs_read.c:166 > 166 enter_cancellation_point(); > (gdb) n > 170 ret = nx_read(fd, buf, nbytes); > (gdb) s > nx_read (fd=4, buf=0x380056b0, nbytes=511) at vfs/fs_read.c:132 > 132 ret = (ssize_t)fs_getfilep(fd, &filep); > (gdb) n > 133 if (ret < 0) > (gdb) > 140 return file_read(filep, buf, nbytes); > (gdb) s > file_read (filep=0x38002e40, buf=0x380056b0, nbytes=511) at > vfs/fs_read.c:67 > 67int ret = -EBADF; > (gdb) n > 69DEBUGASSERT(filep); > (gdb) > 70inode = filep->f_inode; > (gdb) > 74if ((filep->f_oflags & O_RDOK) == 0) > (gdb) > 85else if (inode != NULL && inode->u.i_ops && inode->u.i_ops->read) > (gdb) > 92ret = (int)inode->u.i_ops->read(filep, > (gdb) s > procfs_read (filep=0x38002e40, buffer=0x380056b0 "", buflen=511) at > procfs/fs_procfs.c:445 > 445 handler = (FAR struct procfs_file_s *)filep->f_priv; > (gdb) n > 446 DEBUGASSERT(handler); > (gdb) > 450 return handler->procfsentry->ops->read(filep, buffer, buflen); > (gdb) s > proc_read (filep=0x38002e40, buffer=0x380056b0 "", buflen=511) at > procfs/fs_procfsproc.c:1552 > 1552 procfile = (FAR struct proc_file_s *)filep->f_priv; > (gdb) n > 1553 DEBUGASSERT(procfile != NULL); > (gdb) > 1557 tcb = nxsched_get_tcb(procfile->pid); > (gdb) > 1558 if (tcb == NULL) > (gdb) print tcb > $2 = (struct tcb_s *) 0x38003f28 > (gdb) print *tcb > $3 = {flink = 0x38008640, blink = 0x0, group = 0x38003fe0, pid = 4, > sched_priority = 100 'd', init_priority = 100 'd', >start = 0x802fc91 , entry = {pthread = 0x806ecf1 > , main = 0x806ecf1 }, task_state = 6 '\006', >flags = 4138, lockcount = 0, cpcount = 0, errcode = 0, timeslice = > 200, waitdog = {next = 0x38005a40, arg = 939540264, > func = 0x802ecdd , lag = 316}, adj_stack_size = 960, > stack_alloc_ptr = 0x38004328, stack_base_ptr = 0x38004368, waitobj = 0x0, >sigprocmask = {_elem = {0, 0}}, sigwaitmask = {_elem = {0, 0}}, > sigpendactionq = {head = 0x0, tail = 0x0}, sigpostedq = {head = 0x0, > tail = 0x0}, sigunbinfo = {si_signo = 255 '\377', si_code = 2 > '\002', si_errno = 110 'n', si_value = {sival_int = 0, sival_ptr = 0x0}, > si_user = 0x0}, mhead = 0x0, ticks = 0, xcp = {sigdeliver = 0x0, > saved_regs = 0x0, regs = 0x3800449c}, >name = "watchdog\000\000\000\000\000\000\000\000"} > (gdb) n > 1566 switch (procfile->node->node) > (gdb) n > 1573 ret = proc_cmdline(procfile, tcb, buffer, buflen, > filep->f_pos); > (gdb) s > proc_cmdline (procfile=0x3800caa0, tcb=0x38003f28, buffer=0x380056b0 "", > buflen=511, offset=0) at procfs/fs_procfsproc.c:664 > 664 remaining = buflen; > (gdb) n > 665 totalsize = 0; > (gdb) > 670 name = tcb->name; > (gdb) > 674 linesize = strlen(name); > (gdb) > 675 memcpy(procfile->line, name, linesize); > (gdb) print linesize > $4 = 8 > (gdb) n > 676 copysize = procfs_memcpy(procfile->line, linesize, buffer, > remaining, > (gdb) n > 679 totalsize += copysize; > (gdb) print copysize > $5 = 8 > (gdb) n > 680 buffer+= copysize; > (gdb) n > 681 remaining -= copysize; > (gdb) n > 683 if (totalsize
Re: Problem with ps and kthread: Reproducible crash
The stack is a good candidate, I see that I reported stack_alloc_ptr = 0x38004328, stack_base_ptr = 0x38004368 but at the crash we have : R2: 38003a1c SP: 380038c0 I will try that tomorrow, thanks for the idea. Sebastien On 5/11/23 19:39, raiden00pl wrote: Have you tried to increase the stack size for NSH ? Stack overflow in NSH can cause some really weird hard faults, not that easy to diagnose. czw., 11 maj 2023 o 18:34 Sebastien Lorquet napisał(a): Hello, I have a stm32h7 board, based on the stm32h743zi2 nucleo. I have activated the watchdog. The method to do the kthread was copied from this : https://github.com/apache/nuttx/blob/master/boards/arm/stm32/photon/src/stm32_wdt.c#LL146C7-L146C7 The watchdog works, the system is stable But now, the nsh 'ps' command ends up with a crash when trying to list the kthread that resets the watchdog. The problem has appeared between current trunk and revision 13d823f30710e6fabd3d6429a03bc37e1086c9e7 Here is the GDB session, a bit after cmd_ps is entered: (pid 4 is the watchog maintenance kthread created like in the file above) open (path=0x3800c770 "/proc/4/cmdline", oflags=1) at vfs/fs_open.c:447 447 if (fd < 0) (gdb) n 453 leave_cancellation_point(); (gdb) 454 return fd; (gdb) 455 } (gdb) nsh_readfile (vtbl=0x38005688, cmd=0x807ab7c "ps", filepath=0x3800c770 "/proc/4/cmdline", buffer=0x380056b0 " 0.0%", buflen=512) at nsh_fsutils.c:219 219 if (fd < 0) (gdb) print fd $1 = 4 (gdb) n 229 ntotal= 0; /* No bytes read yet */ (gdb) 230 *buffer = '\0'; /* NUL terminate the empty buffer */ (gdb) 231 bufptr= buffer; /* Working pointer */ (gdb) 232 remaining = buflen - 1; /* Reserve one byte for a NUL terminator */ (gdb) 233 ret = ERROR; /* Assume failure */ (gdb) 237 nread = read(fd, bufptr, remaining); (gdb) s read (fd=4, buf=0x380056b0, nbytes=511) at vfs/fs_read.c:166 166 enter_cancellation_point(); (gdb) n 170 ret = nx_read(fd, buf, nbytes); (gdb) s nx_read (fd=4, buf=0x380056b0, nbytes=511) at vfs/fs_read.c:132 132 ret = (ssize_t)fs_getfilep(fd, &filep); (gdb) n 133 if (ret < 0) (gdb) 140 return file_read(filep, buf, nbytes); (gdb) s file_read (filep=0x38002e40, buf=0x380056b0, nbytes=511) at vfs/fs_read.c:67 67int ret = -EBADF; (gdb) n 69DEBUGASSERT(filep); (gdb) 70inode = filep->f_inode; (gdb) 74if ((filep->f_oflags & O_RDOK) == 0) (gdb) 85else if (inode != NULL && inode->u.i_ops && inode->u.i_ops->read) (gdb) 92ret = (int)inode->u.i_ops->read(filep, (gdb) s procfs_read (filep=0x38002e40, buffer=0x380056b0 "", buflen=511) at procfs/fs_procfs.c:445 445 handler = (FAR struct procfs_file_s *)filep->f_priv; (gdb) n 446 DEBUGASSERT(handler); (gdb) 450 return handler->procfsentry->ops->read(filep, buffer, buflen); (gdb) s proc_read (filep=0x38002e40, buffer=0x380056b0 "", buflen=511) at procfs/fs_procfsproc.c:1552 1552 procfile = (FAR struct proc_file_s *)filep->f_priv; (gdb) n 1553 DEBUGASSERT(procfile != NULL); (gdb) 1557 tcb = nxsched_get_tcb(procfile->pid); (gdb) 1558 if (tcb == NULL) (gdb) print tcb $2 = (struct tcb_s *) 0x38003f28 (gdb) print *tcb $3 = {flink = 0x38008640, blink = 0x0, group = 0x38003fe0, pid = 4, sched_priority = 100 'd', init_priority = 100 'd', start = 0x802fc91 , entry = {pthread = 0x806ecf1 , main = 0x806ecf1 }, task_state = 6 '\006', flags = 4138, lockcount = 0, cpcount = 0, errcode = 0, timeslice = 200, waitdog = {next = 0x38005a40, arg = 939540264, func = 0x802ecdd , lag = 316}, adj_stack_size = 960, stack_alloc_ptr = 0x38004328, stack_base_ptr = 0x38004368, waitobj = 0x0, sigprocmask = {_elem = {0, 0}}, sigwaitmask = {_elem = {0, 0}}, sigpendactionq = {head = 0x0, tail = 0x0}, sigpostedq = {head = 0x0, tail = 0x0}, sigunbinfo = {si_signo = 255 '\377', si_code = 2 '\002', si_errno = 110 'n', si_value = {sival_int = 0, sival_ptr = 0x0}, si_user = 0x0}, mhead = 0x0, ticks = 0, xcp = {sigdeliver = 0x0, saved_regs = 0x0, regs = 0x3800449c}, name = "watchdog\000\000\000\000\000\000\000\000"} (gdb) n 1566 switch (procfile->node->node) (gdb) n 1573 ret = proc_cmdline(procfile, tcb, buffer, buflen, filep->f_pos); (gdb) s proc_cmdline (procfile=0x3800caa0, tcb=0x38003f28, buffer=0x380056b0 "", buflen=511, offset=0) at procfs/fs_procfsproc.c:664 664 remaining = buflen; (gdb) n 665 totalsize = 0; (gdb) 670 name = tcb->name; (gdb) 674 linesize = strlen(name); (gdb) 675 memcpy(procfile->line, name, linesize); (gdb) print linesize $4 = 8 (gdb) n 676 copysize = procfs_memcpy(procfile->line, linesize, buffer, remaining, (gdb) n 679 totalsize += copysize; (gdb) print copysize $5 = 8 (gdb) n 680 buffer+= copysize; (gdb) n 681 remaining -= copysize; (gdb) n 683 if (totalsize >= bufl