Have you tried to increase the stack size for NSH ? Stack overflow in NSH
can cause some really weird hard faults, not that easy to diagnose.


czw., 11 maj 2023 o 18:34 Sebastien Lorquet <sebast...@lorquet.fr>
napisaƂ(a):

> Hello,
>
> I have a stm32h7 board, based on the stm32h743zi2 nucleo.
>
> I have activated the watchdog.
>
> The method to do the kthread was copied from this :
>
>
> https://github.com/apache/nuttx/blob/master/boards/arm/stm32/photon/src/stm32_wdt.c#LL146C7-L146C7
>
> The watchdog works, the system is stable
>
>
> But now, the nsh 'ps' command ends up with a crash when trying to list
> the kthread that resets the watchdog.
>
>
> The problem has appeared between current trunk and revision
> 13d823f30710e6fabd3d6429a03bc37e1086c9e7
>
>
> Here is the GDB session, a bit after cmd_ps is entered:
>
> (pid 4 is the watchog maintenance kthread created like in the file above)
>
>
> open (path=0x3800c770 "/proc/4/cmdline", oflags=1) at vfs/fs_open.c:447
> 447       if (fd < 0)
> (gdb) n
> 453       leave_cancellation_point();
> (gdb)
> 454       return fd;
> (gdb)
> 455     }
> (gdb)
> nsh_readfile (vtbl=0x38005688, cmd=0x807ab7c "ps", filepath=0x3800c770
> "/proc/4/cmdline", buffer=0x380056b0 "  0.0%", buflen=512)
>      at nsh_fsutils.c:219
> 219       if (fd < 0)
> (gdb) print fd
> $1 = 4
> (gdb) n
> 229       ntotal    = 0;          /* No bytes read yet */
> (gdb)
> 230       *buffer   = '\0';       /* NUL terminate the empty buffer */
> (gdb)
> 231       bufptr    = buffer;     /* Working pointer */
> (gdb)
> 232       remaining = buflen - 1; /* Reserve one byte for a NUL
> terminator */
> (gdb)
> 233       ret       = ERROR;      /* Assume failure */
> (gdb)
> 237           nread = read(fd, bufptr, remaining);
> (gdb) s
> read (fd=4, buf=0x380056b0, nbytes=511) at vfs/fs_read.c:166
> 166       enter_cancellation_point();
> (gdb) n
> 170       ret = nx_read(fd, buf, nbytes);
> (gdb) s
> nx_read (fd=4, buf=0x380056b0, nbytes=511) at vfs/fs_read.c:132
> 132       ret = (ssize_t)fs_getfilep(fd, &filep);
> (gdb) n
> 133       if (ret < 0)
> (gdb)
> 140       return file_read(filep, buf, nbytes);
> (gdb) s
> file_read (filep=0x38002e40, buf=0x380056b0, nbytes=511) at
> vfs/fs_read.c:67
> 67        int ret = -EBADF;
> (gdb) n
> 69        DEBUGASSERT(filep);
> (gdb)
> 70        inode = filep->f_inode;
> (gdb)
> 74        if ((filep->f_oflags & O_RDOK) == 0)
> (gdb)
> 85        else if (inode != NULL && inode->u.i_ops && inode->u.i_ops->read)
> (gdb)
> 92            ret = (int)inode->u.i_ops->read(filep,
> (gdb) s
> procfs_read (filep=0x38002e40, buffer=0x380056b0 "", buflen=511) at
> procfs/fs_procfs.c:445
> 445       handler = (FAR struct procfs_file_s *)filep->f_priv;
> (gdb) n
> 446       DEBUGASSERT(handler);
> (gdb)
> 450       return handler->procfsentry->ops->read(filep, buffer, buflen);
> (gdb) s
> proc_read (filep=0x38002e40, buffer=0x380056b0 "", buflen=511) at
> procfs/fs_procfsproc.c:1552
> 1552      procfile = (FAR struct proc_file_s *)filep->f_priv;
> (gdb) n
> 1553      DEBUGASSERT(procfile != NULL);
> (gdb)
> 1557      tcb = nxsched_get_tcb(procfile->pid);
> (gdb)
> 1558      if (tcb == NULL)
> (gdb) print tcb
> $2 = (struct tcb_s *) 0x38003f28
> (gdb) print *tcb
> $3 = {flink = 0x38008640, blink = 0x0, group = 0x38003fe0, pid = 4,
> sched_priority = 100 'd', init_priority = 100 'd',
>    start = 0x802fc91 <nxtask_start>, entry = {pthread = 0x806ecf1
> <wdog_daemon>, main = 0x806ecf1 <wdog_daemon>}, task_state = 6 '\006',
>    flags = 4138, lockcount = 0, cpcount = 0, errcode = 0, timeslice =
> 200, waitdog = {next = 0x38005a40, arg = 939540264,
>      func = 0x802ecdd <nxsig_timeout>, lag = 316}, adj_stack_size = 960,
> stack_alloc_ptr = 0x38004328, stack_base_ptr = 0x38004368, waitobj = 0x0,
>    sigprocmask = {_elem = {0, 0}}, sigwaitmask = {_elem = {0, 0}},
> sigpendactionq = {head = 0x0, tail = 0x0}, sigpostedq = {head = 0x0,
>      tail = 0x0}, sigunbinfo = {si_signo = 255 '\377', si_code = 2
> '\002', si_errno = 110 'n', si_value = {sival_int = 0, sival_ptr = 0x0},
>      si_user = 0x0}, mhead = 0x0, ticks = 0, xcp = {sigdeliver = 0x0,
> saved_regs = 0x0, regs = 0x3800449c},
>    name = "watchdog\000\000\000\000\000\000\000\000"}
> (gdb) n
> 1566      switch (procfile->node->node)
> (gdb) n
> 1573          ret = proc_cmdline(procfile, tcb, buffer, buflen,
> filep->f_pos);
> (gdb) s
> proc_cmdline (procfile=0x3800caa0, tcb=0x38003f28, buffer=0x380056b0 "",
> buflen=511, offset=0) at procfs/fs_procfsproc.c:664
> 664       remaining = buflen;
> (gdb) n
> 665       totalsize = 0;
> (gdb)
> 670       name       = tcb->name;
> (gdb)
> 674       linesize   = strlen(name);
> (gdb)
> 675       memcpy(procfile->line, name, linesize);
> (gdb) print linesize
> $4 = 8
> (gdb) n
> 676       copysize   = procfs_memcpy(procfile->line, linesize, buffer,
> remaining,
> (gdb) n
> 679       totalsize += copysize;
> (gdb) print copysize
> $5 = 8
> (gdb) n
> 680       buffer    += copysize;
> (gdb) n
> 681       remaining -= copysize;
> (gdb) n
> 683       if (totalsize >= buflen)
> (gdb) print remaining
> $6 = 503
> (gdb) n
> 690       linesize   = group_argvstr(tcb, procfile->line, remaining);
> (gdb) s
> group_argvstr (tcb=0x38003f28, args=0x3800caac "watchdog", size=503) at
> group/group_argvstr.c:61
> 61        size_t n = 0;
> (gdb) n
> 68        if (!tcb || !tcb->group || !tcb->group->tg_info)
> (gdb)
> 84        if ((tcb->flags & TCB_FLAG_TTYPE_MASK) == TCB_FLAG_TTYPE_PTHREAD)
> (gdb)
> 93            FAR char **argv = tcb->group->tg_info->argv + 1;
> (gdb)
> 95            while (*argv != NULL && n < size)
> (gdb)
> 97                n += snprintf(args + n, size - n, " %s", *argv++);
> (gdb) n
>
> *CRASH*
>
> When I just run, the stack dump shows a crash at
>
> Fault:   IRQ: 3 regs: 0x380037ec
> arm_busfault:   BASEPRI: 000000f0 PRIMASK: 00000000 IPSR: 00000003
> CONTROL: 00000000
> arm_busfault:   CFSR: 00008200 HFSR: 40000000 DFSR: 00000000 BFAR:
> 00000003 AFSR: 00000000
> arm_busfault: Bus Fault Reason:
> arm_busfault:   Precise data bus error
> _assert: Current Version: NuttX bca 12.0.0-RC1 2eac660ff6-dirty May 11
> 2023 15:27:06 arm
> _assert: Assertion failed panic: at file: armv7-m/arm_busfault.c:106
> task: nsh_main 0x8039c41
> up_dump_register: R0: 00000003 R1: ffffffff R2: 38003a1c  R3: 00000003
> up_dump_register: R4: 08039e89 R5: 3800571e R6: 38005734  FP: 380038c0
> up_dump_register: R8: 00000000 SB: 00000000 SL: 00000000 R11: 00000000
> up_dump_register: IP: a0000000 SP: 380038c0 LR: 08035537  PC: 080367a8
> up_dump_register: xPSR: a1000000 PRIMASK: 00000000 CONTROL: 00000000
>
> PC=nuttx/libs/libc/string/lib_strnlen.c:37 (discriminator 3)
> LR=nuttx/libs/libc/stdio/lib_libvsprintf.c:940 (discriminator 4)
>
> This is trying to measure the length of the command line
>
> Running again with a breakpoint in proc_cmdline:
>
> Breakpoint 1, proc_cmdline (procfile=0x3800caa0, tcb=0x38003f28,
> buffer=0x380056b0 "", buflen=511, offset=0) at procfs/fs_procfsproc.c:664
> 664       remaining = buflen;
> (gdb) print *tcb
> $4 = {flink = 0x38008640, blink = 0x0, group = 0x38003fe0, pid = 4,
> sched_priority = 100 'd', init_priority = 100 'd',
>    start = 0x802fc91 <nxtask_start>, entry = {pthread = 0x806ecf1
> <wdog_daemon>, main = 0x806ecf1 <wdog_daemon>}, task_state = 6 '\006',
>    flags = 4138, lockcount = 0, cpcount = 0, errcode = 0, timeslice =
> 200, waitdog = {next = 0x0, arg = 939540264,
>      func = 0x802ecdd <nxsig_timeout>, lag = 419}, adj_stack_size = 960,
> stack_alloc_ptr = 0x38004328, stack_base_ptr = 0x38004368, waitobj = 0x0,
>    sigprocmask = {_elem = {0, 0}}, sigwaitmask = {_elem = {0, 0}},
> sigpendactionq = {head = 0x0, tail = 0x0}, sigpostedq = {head = 0x0,
>      tail = 0x0}, sigunbinfo = {si_signo = 255 '\377', si_code = 2
> '\002', si_errno = 110 'n', si_value = {sival_int = 0, sival_ptr = 0x0},
>      si_user = 0x0}, mhead = 0x0, ticks = 0, xcp = {sigdeliver = 0x0,
> saved_regs = 0x0, regs = 0x3800449c},
>    name = "watchdog\000\000\000\000\000\000\000\000"}
>
> (gdb) print *tcb->group
> $5 = {flink = 0x38002b40, tg_pid = 4, tg_ppid = 0, tg_flags = 0 '\000',
> tg_nmembers = 1 '\001', tg_mxmembers = 4 '\004', tg_members = 0x38004078,
>    tg_nwaiters = 0 '\000', tg_waitflags = 0 '\000', tg_exitsem =
> {semcount = 0, flags = 0 '\000', waitlist = {head = 0x0, tail = 0x0}},
>    tg_statloc = 0x0, tg_joinlock = {sem = {semcount = 1, flags = 0
> '\000', waitlist = {head = 0x0, tail = 0x0}}, holder = -1}, tg_joinhead
> = 0x0,
>    tg_jointail = 0x0, tg_info = 0x38004090, tg_sigactionq = {head = 0x0,
> tail = 0x0}, tg_sigpendingq = {head = 0x0, tail = 0x0}, tg_envp = 0x0,
>    tg_envc = 0, itimer = 0x0, tg_filelist = {fl_lock = {sem = {semcount
> = 1, flags = 0 '\000', waitlist = {head = 0x0, tail = 0x0}}, holder = -1},
>      fl_rows = 1 '\001', fl_files = 0x38004250}, tg_mm_map = {mm_map_sq
> = {head = 0x0, tail = 0x0}, mm_map_mutex = {mutex = {sem = {semcount = 1,
>            flags = 0 '\000', waitlist = {head = 0x0, tail = 0x0}},
> holder = -1}, count = 0}}}
>
> (gdb) print *tcb->group->tg_info
> $6 = {ta_lock = {sem = {semcount = 1, flags = 0 '\000', waitlist = {head
> = 0x0, tail = 0x0}}, holder = -1}, argv = 0x38004350,
>    ta_tlsset = 0 '\000', ta_tlsdtor = {0x0, 0x0, 0x0, 0x0}, ta_getopt =
> {go_optarg = 0x0, go_opterr = 0, go_optind = 0, go_optopt = 0,
>      go_optptr = 0x0, go_binitialized = false}, ta_umask = 0,
> ta_streamlist = {sl_lock = {sem = {semcount = 1, flags = 0 '\000',
> waitlist = {
>            head = 0x0, tail = 0x0}}, holder = -1}, sl_std = {{fs_next =
> 0x0, fs_lock = {mutex = {sem = {semcount = 1, flags = 0 '\000',
>                waitlist = {head = 0x0, tail = 0x0}}, holder = -1}, count
> = 0}, fs_fd = 0, fs_bufstart = 0x38004110 "",
>          fs_bufend = 0x38004150 "\001", fs_bufpos = 0x38004110 "",
> fs_bufread = 0x38004110 "", fs_buffer = '\000' <repeats 63 times>,
>          fs_oflags = 1, fs_flags = 12 '\f', fs_nungotten = 0 '\000',
> fs_ungotten = "\000"}, {fs_next = 0x0, fs_lock = {mutex = {sem = {
>                semcount = 1, flags = 0 '\000', waitlist = {head = 0x0,
> tail = 0x0}}, holder = -1}, count = 0}, fs_fd = 1,
>          fs_bufstart = 0x38004184 "", fs_bufend = 0x380041c4 "\006",
> fs_bufpos = 0x38004184 "", fs_bufread = 0x38004184 "",
>          fs_buffer = '\000' <repeats 63 times>, fs_oflags = 6, fs_flags
> = 12 '\f', fs_nungotten = 0 '\000', fs_ungotten = "\000"}, {fs_next = 0x0,
>          fs_lock = {mutex = {sem = {semcount = 1, flags = 0 '\000',
> waitlist = {head = 0x0, tail = 0x0}}, holder = -1}, count = 0}, fs_fd = 2,
>          fs_bufstart = 0x380041f8 "", fs_bufend = 0x38004238 "\006",
> fs_bufpos = 0x380041f8 "", fs_bufread = 0x380041f8 "",
>          fs_buffer = '\000' <repeats 63 times>, fs_oflags = 6, fs_flags
> = 12 '\f', fs_nungotten = 0 '\000', fs_ungotten = "\000"}}, sl_head = 0x0,
>      sl_tail = 0x0}}
>
> We see that argv = 0x38004350, which is in the stack: stack_alloc_ptr =
> 0x38004328, stack_base_ptr = 0x38004368
>
> (gdb) print (uint32_t[10])*tcb->group->tg_info->argv
> $8 = {939541660, 3, 2, 939541660, 939541352, 134362499, 0, 0, 939541660, 3}
> (gdb) print (char*[10])*tcb->group->tg_info->argv
> $9 = {0x3800449c "pE", 0x3 "", 0x2 "", 0x3800449c "pE", 0x38004368 "",
> 0x8023583 <arm_hardfault+78> "F+\340{j\003\360\200C", 0x0, 0x0,
>    0x3800449c "pE", 0x3 ""}
>
> This is not good! Too many non null values in that array, some of them
> are not pointers! The value 3 is found in the BFAR of the crash dump.
>
> kthread_create was called with argv = NULL
>
> This calls kthread_create_with_stack with argv = NULL
>
> This calls nxthread_create with argv = NULL
>
> This calls nxtask_init with argv = NULL, envp=NULL
>
> goto sched/task/task_init.c
>
> This goes to nxtask_setup_arguments with argv=NULL
>
> goto sched/task/task_setup.c
>
> This calls nxtask_setup_stackargs
>
> Looks like this function does the right thing.
>
> I should get argc=0, allocation of two pointers, one for the task name
> and one for the final null arg.
>
>
> Then what is happening? Why do I see invalid values in the argv array?
>
>
> Can someone help me? Can someone try to reproduce this?
>
> I have built this image with a make distclean, apply config, make, so I
> dont expect dangling files.
>
>
>
> Thanks,
>
> Sebastien
>
>
>

Reply via email to