This has been a requested feature for a long time [1][2].

/proc/<pid> and /proc/<tgid>/task/<pid> will show POLLIN | POLLRDNORM
when the <pid> is dead and POLLIN | POLLRDNORM | POLLERR when <pid> is
dead and reaped.

The ability to tell whether the task exists given an fd isn't new -- readdir
can do it.  The ability to distinguish live and zombie tasks by fd may
have minor security implications.

It's conceivable, although unlikely, that some existing software expects
directories in /proc to always have POLLIN set.  The benefit of using POLLIN
instead of something like POLLPRI is feature detection -- checking /proc/self
for POLLIN will reliably detect this feature.

[1] http://lwn.net/Articles/462177/
[2] http://0pointer.de/blog/projects/plumbers-wishlist-3.html

Signed-off-by: Andy Lutomirski <l...@amacapital.net>
---
 fs/proc/base.c            | 26 ++++++++++++++++++++++++++
 include/linux/init_task.h |  2 ++
 include/linux/sched.h     |  3 +++
 kernel/exit.c             |  3 +++
 kernel/fork.c             |  4 ++++
 5 files changed, 38 insertions(+)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3c231ad..ebab7ec 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2737,9 +2737,34 @@ static int proc_tgid_base_readdir(struct file * filp,
                                   tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
 }
 
+static unsigned int proc_task_base_poll(struct file *filp,
+                                       struct poll_table_struct *wait)
+{
+       struct task_struct *task;
+       int retval;
+
+       task = get_proc_task(filp->f_path.dentry->d_inode);
+       if (!task)
+               return POLLIN | POLLRDNORM | POLLERR;
+
+       read_lock(&tasklist_lock);
+       poll_wait(filp, &task->detach_wqh, wait);
+       if (task_is_dead(task)) {
+               retval = POLLIN | POLLRDNORM;
+       } else {
+               retval = 0;
+               poll_wait(filp, &task->exit_wqh, wait);
+       }
+       read_unlock(&tasklist_lock);
+
+       put_task_struct(task);
+       return retval;
+}
+
 static const struct file_operations proc_tgid_base_operations = {
        .read           = generic_read_dir,
        .readdir        = proc_tgid_base_readdir,
+       .poll           = proc_task_base_poll,
        .llseek         = default_llseek,
 };
 
@@ -3110,6 +3135,7 @@ static struct dentry *proc_tid_base_lookup(struct inode 
*dir, struct dentry *den
 static const struct file_operations proc_tid_base_operations = {
        .read           = generic_read_dir,
        .readdir        = proc_tid_base_readdir,
+       .poll           = proc_task_base_poll,
        .llseek         = default_llseek,
 };
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6d087c5..093379e 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -202,6 +202,8 @@ extern struct task_group root_task_group;
                [PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),            \
        },                                                              \
        .thread_group   = LIST_HEAD_INIT(tsk.thread_group),             \
+       .exit_wqh       = __WAIT_QUEUE_HEAD_INITIALIZER(tsk.exit_wqh),  \
+       .detach_wqh     = __WAIT_QUEUE_HEAD_INITIALIZER(tsk.detach_wqh),\
        INIT_IDS                                                        \
        INIT_PERF_EVENTS(tsk)                                           \
        INIT_TRACE_IRQFLAGS                                             \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0dd42a0..6034a37 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1548,6 +1548,9 @@ struct task_struct {
 #ifdef CONFIG_UPROBES
        struct uprobe_task *utask;
 #endif
+
+       /* These are woken with tasklist_lock held. */
+       wait_queue_head_t exit_wqh, detach_wqh;
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
diff --git a/kernel/exit.c b/kernel/exit.c
index 346616c..01c584b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -212,6 +212,7 @@ repeat:
                        leader->exit_state = EXIT_DEAD;
        }
 
+       wake_up_all(&p->detach_wqh);
        write_unlock_irq(&tasklist_lock);
        release_thread(p);
        call_rcu(&p->rcu, delayed_put_task_struct);
@@ -775,6 +776,8 @@ static void exit_notify(struct task_struct *tsk, int 
group_dead)
        /* mt-exec, de_thread() is waiting for group leader */
        if (unlikely(tsk->signal->notify_count < 0))
                wake_up_process(tsk->signal->group_exit_task);
+
+       wake_up_all(&tsk->exit_wqh);
        write_unlock_irq(&tasklist_lock);
 
        /* If the process is dead, release it - nobody will wait for it */
diff --git a/kernel/fork.c b/kernel/fork.c
index 8b20ab7..356b32c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -235,6 +235,8 @@ void __put_task_struct(struct task_struct *tsk)
        WARN_ON(!tsk->exit_state);
        WARN_ON(atomic_read(&tsk->usage));
        WARN_ON(tsk == current);
+       WARN_ON(waitqueue_active(&tsk->exit_wqh));
+       WARN_ON(waitqueue_active(&tsk->detach_wqh));
 
        security_task_free(tsk);
        exit_creds(tsk);
@@ -1285,6 +1287,8 @@ static struct task_struct *copy_process(unsigned long 
clone_flags,
        p->memcg_batch.do_batch = 0;
        p->memcg_batch.memcg = NULL;
 #endif
+       init_waitqueue_head(&p->exit_wqh);
+       init_waitqueue_head(&p->detach_wqh);
 
        /* Perform scheduler related setup. Assign this task to a CPU. */
        sched_fork(p);
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to