This is an automated email from the ASF dual-hosted git repository. btashton pushed a commit to branch releases/10.0 in repository https://gitbox.apache.org/repos/asf/incubator-nuttx.git
The following commit(s) were added to refs/heads/releases/10.0 by this push: new ed25357 sched: Fix nx_waitid(), nx_waitpid(), nxtask_exithook() for SMP ed25357 is described below commit ed2535733ae9e35f5a9bbda50160af05859a252f Author: Masayuki Ishikawa <masayuki.ishik...@gmail.com> AuthorDate: Thu Nov 12 08:45:36 2020 +0900 sched: Fix nx_waitid(), nx_waitpid(), nxtask_exithook() for SMP Summary: - I noticed waitpid_test stops with lc823450-xgevk:rndis - The condition was CONFIG_DEBUG_ASSERTION=y - Actually, the child task sent SIGCHILD but the parent couldn't catch the signal - Then, I found that nx_waitid(), nx_waitpid() use sched_lock() - However, a parent task and a child task are running on different CPUs - So, sched_lock() is not enough and need to use a critical section - Also, signal handling in nxtask_exithook() must be done in a critical section Impact: - SMP only Testing: - Tested with ostest with the following configurations - lc823450-xgevk:rndis (CONFIG_DEBUG_ASSERTION=y and n) - spresense:smp - spresense:wifi_smp (NCPUS=2 and 4) - sabre-6quad:smp (QEMU) - esp32-core:smp (QEMU) - maix-bit:smp (QEMU) - sim:smp Signed-off-by: Masayuki Ishikawa <masayuki.ishik...@jp.sony.com> --- sched/sched/sched_waitid.c | 13 +++++++++++++ sched/sched/sched_waitpid.c | 34 ++++++++++++++++++++++++++++++---- sched/task/task_exithook.c | 12 +++++++++++- 3 files changed, 54 insertions(+), 5 deletions(-) diff --git a/sched/sched/sched_waitid.c b/sched/sched/sched_waitid.c index 2b8590d..86ee4b2 100644 --- a/sched/sched/sched_waitid.c +++ b/sched/sched/sched_waitid.c @@ -138,6 +138,14 @@ int nx_waitid(int idtype, id_t id, FAR siginfo_t *info, int options) sigemptyset(&set); nxsig_addset(&set, SIGCHLD); + /* NOTE: sched_lock() is not enough for SMP + * because the child task is running on another CPU + */ + +#ifdef CONFIG_SMP + irqstate_t flags = enter_critical_section(); +#endif + /* Disable pre-emption so that nothing changes while the loop executes */ sched_lock(); @@ -374,6 +382,11 @@ int nx_waitid(int idtype, id_t id, FAR siginfo_t *info, int options) errout: sched_unlock(); + +#ifdef CONFIG_SMP + leave_critical_section(flags); +#endif + return ret; } diff --git a/sched/sched/sched_waitpid.c b/sched/sched/sched_waitpid.c index c33186a..c4674ea 100644 --- a/sched/sched/sched_waitpid.c +++ b/sched/sched/sched_waitpid.c @@ -72,6 +72,14 @@ pid_t nx_waitpid(pid_t pid, int *stat_loc, int options) DEBUGASSERT(stat_loc); + /* NOTE: sched_lock() is not enough for SMP + * because the child task is running on another CPU + */ + +#ifdef CONFIG_SMP + irqstate_t flags = enter_critical_section(); +#endif + /* Disable pre-emption so that nothing changes in the following tests */ sched_lock(); @@ -158,11 +166,15 @@ pid_t nx_waitpid(pid_t pid, int *stat_loc, int options) /* On success, return the PID */ - sched_unlock(); - return pid; + ret = pid; errout: sched_unlock(); + +#ifdef CONFIG_SMP + leave_critical_section(flags); +#endif + return ret; } @@ -199,6 +211,14 @@ pid_t nx_waitpid(pid_t pid, int *stat_loc, int options) sigemptyset(&set); nxsig_addset(&set, SIGCHLD); + /* NOTE: sched_lock() is not enough for SMP + * because the child task is running on another CPU + */ + +#ifdef CONFIG_SMP + irqstate_t flags = enter_critical_section(); +#endif + /* Disable pre-emption so that nothing changes while the loop executes */ sched_lock(); @@ -438,11 +458,17 @@ pid_t nx_waitpid(pid_t pid, int *stat_loc, int options) } } - sched_unlock(); - return pid; + /* On success, return the PID */ + + ret = pid; errout: sched_unlock(); + +#ifdef CONFIG_SMP + leave_critical_section(flags); +#endif + return ret; } #endif /* CONFIG_SCHED_HAVE_PARENT */ diff --git a/sched/task/task_exithook.c b/sched/task/task_exithook.c index 335638e..48cb5c8 100644 --- a/sched/task/task_exithook.c +++ b/sched/task/task_exithook.c @@ -639,7 +639,13 @@ void nxtask_exithook(FAR struct tcb_s *tcb, int status, bool nonblocking) nxtask_recover(tcb); - /* Send the SIGCHILD signal to the parent task group */ + /* NOTE: signal handling needs to be done in a criticl section */ + +#ifdef CONFIG_SMP + irqstate_t flags = enter_critical_section(); +#endif + + /* Send the SIGCHLD signal to the parent task group */ nxtask_signalparent(tcb, status); @@ -673,6 +679,10 @@ void nxtask_exithook(FAR struct tcb_s *tcb, int status, bool nonblocking) nxsig_cleanup(tcb); /* Deallocate Signal lists */ +#ifdef CONFIG_SMP + leave_critical_section(flags); +#endif + /* This function can be re-entered in certain cases. Set a flag * bit in the TCB to not that we have already completed this exit * processing.