It was found that a dying mm_struct where the owning task has exited can
stay on as active_mm of kernel threads as long as no other user tasks
run on those CPUs that use it as active_mm. This prolongs the life time
of dying mm holding up memory and other resources that cannot be freed.

Fix that by forcing the kernel threads to use init_mm as the active_mm
if the previous active_mm is dying.

Signed-off-by: Waiman Long <long...@redhat.com>
---
 kernel/sched/core.c | 13 +++++++++++--
 mm/init-mm.c        |  2 ++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2b037f195473..ca348e1f5a1e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3233,13 +3233,22 @@ context_switch(struct rq *rq, struct task_struct *prev,
         * Both of these contain the full memory barrier required by
         * membarrier after storing to rq->curr, before returning to
         * user-space.
+        *
+        * If mm is NULL and oldmm is dying (!owner), we switch to
+        * init_mm instead to make sure that oldmm can be freed ASAP.
         */
-       if (!mm) {
+       if (!mm && oldmm->owner) {
                next->active_mm = oldmm;
                mmgrab(oldmm);
                enter_lazy_tlb(oldmm, next);
-       } else
+       } else {
+               if (!mm) {
+                       mm = &init_mm;
+                       next->active_mm = mm;
+                       mmgrab(mm);
+               }
                switch_mm_irqs_off(oldmm, mm, next);
+       }
 
        if (!prev->mm) {
                prev->active_mm = NULL;
diff --git a/mm/init-mm.c b/mm/init-mm.c
index a787a319211e..5bfc6bc333ca 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -5,6 +5,7 @@
 #include <linux/spinlock.h>
 #include <linux/list.h>
 #include <linux/cpumask.h>
+#include <linux/sched/task.h>
 
 #include <linux/atomic.h>
 #include <linux/user_namespace.h>
@@ -36,5 +37,6 @@ struct mm_struct init_mm = {
        .mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
        .user_ns        = &init_user_ns,
        .cpu_bitmap     = { [BITS_TO_LONGS(NR_CPUS)] = 0},
+       .owner          = &init_task,
        INIT_MM_CONTEXT(init_mm)
 };
-- 
2.18.1

Reply via email to