Commit-ID:  39d6cb39a81744473e13c693a9f988a9e342018b
Gitweb:     http://git.kernel.org/tip/39d6cb39a81744473e13c693a9f988a9e342018b
Author:     Peter Zijlstra <a.p.zijls...@chello.nl>
AuthorDate: Tue, 17 Jul 2012 22:54:51 +0200
Committer:  Ingo Molnar <mi...@kernel.org>
CommitDate: Wed, 26 Sep 2012 11:48:35 +0200

mm/mpol: Use special PROT_NONE to migrate pages

Combine our previous PROT_NONE, mpol_misplaced and
migrate_misplaced_page() pieces into an effective migrate on fault
scheme.

Suggested-by: Rik van Riel <r...@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijls...@chello.nl>
Reviewed-by: Rik van Riel <r...@redhat.com>
Cc: Paul Turner <p...@google.com>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Andrew Morton <a...@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-e98gyl8kr9jzooh2s4piu...@git.kernel.org
Signed-off-by: Ingo Molnar <mi...@kernel.org>
---
 mm/huge_memory.c |   41 ++++++++++++++++++++++++++++++++++++++++-
 mm/memory.c      |   42 ++++++++++++++++++++++++++++++++++++------
 2 files changed, 76 insertions(+), 7 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 5d7b114..a147d29 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -17,6 +17,7 @@
 #include <linux/khugepaged.h>
 #include <linux/freezer.h>
 #include <linux/mman.h>
+#include <linux/migrate.h>
 #include <asm/tlb.h>
 #include <asm/pgalloc.h>
 #include "internal.h"
@@ -766,12 +767,48 @@ void do_huge_pmd_prot_none(struct mm_struct *mm, struct 
vm_area_struct *vma,
                           unsigned int flags, pmd_t entry)
 {
        unsigned long haddr = address & HPAGE_PMD_MASK;
+       struct page *page = NULL;
+       int node;
 
        spin_lock(&mm->page_table_lock);
        if (unlikely(!pmd_same(*pmd, entry)))
                goto out_unlock;
 
-       /* do fancy stuff */
+       if (unlikely(pmd_trans_splitting(entry))) {
+               spin_unlock(&mm->page_table_lock);
+               wait_split_huge_page(vma->anon_vma, pmd);
+               return;
+       }
+
+#ifdef CONFIG_NUMA
+       page = pmd_page(entry);
+       VM_BUG_ON(!PageCompound(page) || !PageHead(page));
+
+       get_page(page);
+       spin_unlock(&mm->page_table_lock);
+
+       /*
+        * XXX should we serialize against split_huge_page ?
+        */
+
+       node = mpol_misplaced(page, vma, haddr);
+       if (node == -1)
+               goto do_fixup;
+
+       /*
+        * Due to lacking code to migrate thp pages, we'll split
+        * (which preserves the special PROT_NONE) and re-take the
+        * fault on the normal pages.
+        */
+       split_huge_page(page);
+       put_page(page);
+       return;
+
+do_fixup:
+       spin_lock(&mm->page_table_lock);
+       if (unlikely(!pmd_same(*pmd, entry)))
+               goto out_unlock;
+#endif
 
        /* change back to regular protection */
        entry = pmd_modify(entry, vma->vm_page_prot);
@@ -780,6 +817,8 @@ void do_huge_pmd_prot_none(struct mm_struct *mm, struct 
vm_area_struct *vma,
 
 out_unlock:
        spin_unlock(&mm->page_table_lock);
+       if (page)
+               put_page(page);
 }
 
 int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
diff --git a/mm/memory.c b/mm/memory.c
index bea2ed5..d896a24 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -57,6 +57,7 @@
 #include <linux/swapops.h>
 #include <linux/elf.h>
 #include <linux/gfp.h>
+#include <linux/migrate.h>
 
 #include <asm/io.h>
 #include <asm/pgalloc.h>
@@ -3441,17 +3442,42 @@ static int do_prot_none(struct mm_struct *mm, struct 
vm_area_struct *vma,
                        unsigned long address, pte_t *ptep, pmd_t *pmd,
                        unsigned int flags, pte_t entry)
 {
+       struct page *page = NULL;
        spinlock_t *ptl;
-       int ret = 0;
+       int node;
 
-       if (!pte_unmap_same(mm, pmd, ptep, entry))
-               goto out;
+       ptl = pte_lockptr(mm, pmd);
+       spin_lock(ptl);
+       if (unlikely(!pte_same(*ptep, entry)))
+               goto unlock;
 
+#ifdef CONFIG_NUMA
        /*
-        * Do fancy stuff...
+        * For NUMA systems we use the special PROT_NONE maps to drive
+        * lazy page migration, see MPOL_MF_LAZY and related.
         */
+       page = vm_normal_page(vma, address, entry);
+       if (!page)
+               goto do_fixup_locked;
+
+       get_page(page);
+       pte_unmap_unlock(ptep, ptl);
+
+       node = mpol_misplaced(page, vma, address);
+       if (node == -1)
+               goto do_fixup;
 
        /*
+        * Page migration will install a new pte with vma->vm_page_prot,
+        * otherwise fall-through to the fixup. Next time,.. perhaps.
+        */
+       if (!migrate_misplaced_page(mm, page, node)) {
+               put_page(page);
+               return 0;
+       }
+
+do_fixup:
+       /*
         * OK, nothing to do,.. change the protection back to what it
         * ought to be.
         */
@@ -3459,6 +3485,9 @@ static int do_prot_none(struct mm_struct *mm, struct 
vm_area_struct *vma,
        if (unlikely(!pte_same(*ptep, entry)))
                goto unlock;
 
+do_fixup_locked:
+#endif /* CONFIG_NUMA */
+
        flush_cache_page(vma, address, pte_pfn(entry));
 
        ptep_modify_prot_start(mm, address, ptep);
@@ -3468,8 +3497,9 @@ static int do_prot_none(struct mm_struct *mm, struct 
vm_area_struct *vma,
        update_mmu_cache(vma, address, ptep);
 unlock:
        pte_unmap_unlock(ptep, ptl);
-out:
-       return ret;
+       if (page)
+               put_page(page);
+       return 0;
 }
 
 /*
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to