On Fri 01-06-18 09:53:09, Eric W. Biederman wrote:
[...]
> +static int mem_cgroup_mm_can_attach(struct cgroup_taskset *tset)
> +{
> +     struct cgroup_subsys_state *css, *tcss;
> +     struct task_struct *p, *t;
> +     struct mm_struct *mm = NULL;
> +     int ret = -EINVAL;
> +
> +     /*
> +      * Ensure all references for every mm can be accounted for by
> +      * tasks that are being migrated.
> +      */
> +     rcu_read_lock();
> +     cgroup_taskset_for_each(p, css, tset) {
> +             int mm_users, mm_count;
> +
> +             /* Does this task have an mm that has not been visited? */
> +             if (!p->mm || (p->flags & PF_KTHREAD) || (p->mm == mm))
> +                     continue;
> +
> +             mm = p->mm;
> +             mm_users = atomic_read(&mm->mm_users);
> +             if (mm_users == 1)
> +                     continue;
> +
> +             mm_count = 0;
> +             cgroup_taskset_for_each(t, tcss, tset) {
> +                     if (t->mm != mm)
> +                             continue;
> +                     mm_count++;
> +             }
> +             /*
> +              * If there are no non-task references mm_users will
> +              * be stable as holding cgroup_thread_rwsem for write
> +              * blocks fork and exit.
> +              */
> +             if (mm_count != mm_users)
> +                     goto out;

Wouldn't it be much more simple to do something like this instead? Sure
it will break migration of non-thread tasks sharing mms but I would like
to see that this actually matters to anybody rather than make the code
more complicated and maintain it forever without a good reason. So yes
this is a bit harsh but considering that the migration suceeded silently
and that was simply broken as well, I am not sure this is any worse.

Btw. MMF_ALIEN_MM could be used in the OOM proper as well.

diff --git a/kernel/fork.c b/kernel/fork.c
index dfe8e14c0fba..285cd0397307 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1303,6 +1303,8 @@ static int copy_mm(unsigned long clone_flags, struct 
task_struct *tsk)
        if (clone_flags & CLONE_VM) {
                mmget(oldmm);
                mm = oldmm;
+               if (unlikely(!(clone_flags & CLONE_THREAD)))
+                       set_bit(MMF_ALIEN_MM, &mm->flags);
                goto good_mm;
        }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2f5dac193431..fa0248fcdb36 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5103,6 +5103,18 @@ static int mem_cgroup_can_attach(struct cgroup_taskset 
*tset)
        if (!mm)
                return 0;
 
+       /*
+        * Migrating a task which shares mm with other thread group
+        * has never been really well defined. Shout to the log when
+        * this happens and see whether anybody really complains.
+        * If so make sure to support migration if all processes sharing
+        * this mm are migrating together.
+        */
+       if (WARN_ON_ONCE(test_bit(MMF_ALIEN_MM, &mm->flags))) {
+               mmput(mm);
+               return -EBUSY;
+       }
+
        /* We move charges except for creative uses of CLONE_VM */
        if (mm->memcg == from) {
                VM_BUG_ON(mc.from);

-- 
Michal Hocko
SUSE Labs

Reply via email to