The VMA sequence count has been introduced to allow fast detection of
VMA modification when running a page fault handler without holding
the mmap_sem.

This patch provides protection agains the VMA modification done in :
        - madvise()
        - mremap()
        - mpol_rebind_policy()
        - vma_replace_policy()
        - change_prot_numa()
        - mlock(), munlock()
        - mprotect()
        - mmap_region()

Signed-off-by: Laurent Dufour <[email protected]>
---
 fs/proc/task_mmu.c |  2 ++
 mm/madvise.c       |  4 ++++
 mm/mempolicy.c     | 10 +++++++++-
 mm/mlock.c         |  9 ++++++---
 mm/mmap.c          |  2 ++
 mm/mprotect.c      |  2 ++
 mm/mremap.c        |  7 +++++++
 7 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f0c8b33d99b1..9bc40620ba39 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1069,8 +1069,10 @@ static ssize_t clear_refs_write(struct file *file, const 
char __user *buf,
                                        goto out_mm;
                                }
                                for (vma = mm->mmap; vma; vma = vma->vm_next) {
+                                       write_seqcount_begin(&vma->vm_sequence);
                                        vma->vm_flags &= ~VM_SOFTDIRTY;
                                        vma_set_page_prot(vma);
+                                       write_seqcount_end(&vma->vm_sequence);
                                }
                                downgrade_write(&mm->mmap_sem);
                                break;
diff --git a/mm/madvise.c b/mm/madvise.c
index 25b78ee4fc2c..d1fa6a7ee604 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -172,7 +172,9 @@ static long madvise_behavior(struct vm_area_struct *vma,
        /*
         * vm_flags is protected by the mmap_sem held in write mode.
         */
+       write_seqcount_begin(&vma->vm_sequence);
        vma->vm_flags = new_flags;
+       write_seqcount_end(&vma->vm_sequence);
 out:
        return error;
 }
@@ -439,9 +441,11 @@ static void madvise_free_page_range(struct mmu_gather *tlb,
                .private = tlb,
        };
 
+       write_seqcount_begin(&vma->vm_sequence);
        tlb_start_vma(tlb, vma);
        walk_page_range(addr, end, &free_walk);
        tlb_end_vma(tlb, vma);
+       write_seqcount_end(&vma->vm_sequence);
 }
 
 static int madvise_free_single_vma(struct vm_area_struct *vma,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 37d0b334bfe9..5e44b3e69a0d 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -447,8 +447,11 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
        struct vm_area_struct *vma;
 
        down_write(&mm->mmap_sem);
-       for (vma = mm->mmap; vma; vma = vma->vm_next)
+       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+               write_seqcount_begin(&vma->vm_sequence);
                mpol_rebind_policy(vma->vm_policy, new, MPOL_REBIND_ONCE);
+               write_seqcount_end(&vma->vm_sequence);
+       }
        up_write(&mm->mmap_sem);
 }
 
@@ -606,9 +609,11 @@ unsigned long change_prot_numa(struct vm_area_struct *vma,
 {
        int nr_updated;
 
+       write_seqcount_begin(&vma->vm_sequence);
        nr_updated = change_protection(vma, addr, end, PAGE_NONE, 0, 1);
        if (nr_updated)
                count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated);
+       write_seqcount_end(&vma->vm_sequence);
 
        return nr_updated;
 }
@@ -709,6 +714,7 @@ static int vma_replace_policy(struct vm_area_struct *vma,
        if (IS_ERR(new))
                return PTR_ERR(new);
 
+       write_seqcount_begin(&vma->vm_sequence);
        if (vma->vm_ops && vma->vm_ops->set_policy) {
                err = vma->vm_ops->set_policy(vma, new);
                if (err)
@@ -717,10 +723,12 @@ static int vma_replace_policy(struct vm_area_struct *vma,
 
        old = vma->vm_policy;
        vma->vm_policy = new; /* protected by mmap_sem */
+       write_seqcount_end(&vma->vm_sequence);
        mpol_put(old);
 
        return 0;
  err_out:
+       write_seqcount_end(&vma->vm_sequence);
        mpol_put(new);
        return err;
 }
diff --git a/mm/mlock.c b/mm/mlock.c
index b562b5523a65..30d9bfc61929 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -438,7 +438,9 @@ static unsigned long __munlock_pagevec_fill(struct pagevec 
*pvec,
 void munlock_vma_pages_range(struct vm_area_struct *vma,
                             unsigned long start, unsigned long end)
 {
+       write_seqcount_begin(&vma->vm_sequence);
        vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
+       write_seqcount_end(&vma->vm_sequence);
 
        while (start < end) {
                struct page *page;
@@ -563,10 +565,11 @@ static int mlock_fixup(struct vm_area_struct *vma, struct 
vm_area_struct **prev,
         * It's okay if try_to_unmap_one unmaps a page just after we
         * set VM_LOCKED, populate_vma_page_range will bring it back.
         */
-
-       if (lock)
+       if (lock) {
+               write_seqcount_begin(&vma->vm_sequence);
                vma->vm_flags = newflags;
-       else
+               write_seqcount_end(&vma->vm_sequence);
+       } else
                munlock_vma_pages_range(vma, start, end);
 
 out:
diff --git a/mm/mmap.c b/mm/mmap.c
index ad85f210a92c..b48bbe6a49c6 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1720,6 +1720,7 @@ unsigned long mmap_region(struct file *file, unsigned 
long addr,
 out:
        perf_event_mmap(vma);
 
+       write_seqcount_begin(&vma->vm_sequence);
        vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
        if (vm_flags & VM_LOCKED) {
                if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
@@ -1742,6 +1743,7 @@ unsigned long mmap_region(struct file *file, unsigned 
long addr,
        vma->vm_flags |= VM_SOFTDIRTY;
 
        vma_set_page_prot(vma);
+       write_seqcount_end(&vma->vm_sequence);
 
        return addr;
 
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 8edd0d576254..1db5b0bf6952 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -345,6 +345,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct 
vm_area_struct **pprev,
         * vm_flags and vm_page_prot are protected by the mmap_sem
         * held in write mode.
         */
+       write_seqcount_begin(&vma->vm_sequence);
        vma->vm_flags = newflags;
        dirty_accountable = vma_wants_writenotify(vma, vma->vm_page_prot);
        vma_set_page_prot(vma);
@@ -360,6 +361,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct 
vm_area_struct **pprev,
                        (newflags & VM_WRITE)) {
                populate_vma_page_range(vma, start, end, NULL);
        }
+       write_seqcount_end(&vma->vm_sequence);
 
        vm_stat_account(mm, oldflags, -nrpages);
        vm_stat_account(mm, newflags, nrpages);
diff --git a/mm/mremap.c b/mm/mremap.c
index cd8a1b199ef9..9c7f69c9e80f 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -300,6 +300,10 @@ static unsigned long move_vma(struct vm_area_struct *vma,
        if (!new_vma)
                return -ENOMEM;
 
+       write_seqcount_begin(&vma->vm_sequence);
+       write_seqcount_begin_nested(&new_vma->vm_sequence,
+                                   SINGLE_DEPTH_NESTING);
+
        moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len,
                                     need_rmap_locks);
        if (moved_len < old_len) {
@@ -316,6 +320,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
                 */
                move_page_tables(new_vma, new_addr, vma, old_addr, moved_len,
                                 true);
+               write_seqcount_end(&vma->vm_sequence);
                vma = new_vma;
                old_len = new_len;
                old_addr = new_addr;
@@ -324,7 +329,9 @@ static unsigned long move_vma(struct vm_area_struct *vma,
                mremap_userfaultfd_prep(new_vma, uf);
                arch_remap(mm, old_addr, old_addr + old_len,
                           new_addr, new_addr + new_len);
+               write_seqcount_end(&vma->vm_sequence);
        }
+       write_seqcount_end(&new_vma->vm_sequence);
 
        /* Conceal VM_ACCOUNT so old reservation is not undone */
        if (vm_flags & VM_ACCOUNT) {
-- 
2.7.4

Reply via email to