Anon THP's huge pages are split for reclaim in add_to_swap(), before they
reach try_to_unmap(); migrate_misplaced_transhuge_page() does its own pmd
remapping, instead of needing try_to_unmap(); migratable hugetlbfs pages
masquerade as pte-mapped in page_check_address().  So try_to_unmap_one()
did not need to handle transparent pmd mappings as page_referenced_one()
does (beyond the TTU_SPLIT_HUGE_PMD case; though what about TTU_MUNLOCK?).

But tmpfs huge pages are split a little later in the reclaim sequence,
when pageout() calls shmem_writepage(): so try_to_unmap_one() now needs
to handle pmd-mapped pages by using page_check_address_transhuge(), and
a function unmap_team_by_pmd() that we shall place in huge_memory.c in
a later patch, but just use a stub for now.

Refine the lookup in page_check_address_transhuge() slightly, to match
what mm_find_pmd() does, and we've been using for a year: take a pmdval
snapshot of *pmd first, to avoid pmd_lock before the pmd_page check,
with a retry if it changes in between.  Was the code wrong before?
I don't think it was, but I am more comfortable with how it is now.

Change its check on hpage_nr_pages() to use compound_order() instead,
two reasons for that: one being that there's now a case in anon THP
splitting where the new call to page_check_address_transhuge() may be on
a PageTail, which hits VM_BUG_ON in PageTransHuge in hpage_nr_pages();
the other being that hpage_nr_pages() on PageTeam gets more interesting
in a later patch, and would no longer be appropriate here.

Say "pmdval" as usual, instead of the "pmde" I made up for mm_find_pmd()
before.  Update the comment in mm_find_pmd() to generalise it away from
just the anon_vma lock.

Signed-off-by: Hugh Dickins <hu...@google.com>
---
 include/linux/pageteam.h |    6 +++
 mm/rmap.c                |   65 +++++++++++++++++++++----------------
 2 files changed, 43 insertions(+), 28 deletions(-)

--- a/include/linux/pageteam.h
+++ b/include/linux/pageteam.h
@@ -29,4 +29,10 @@ static inline struct page *team_head(str
        return head;
 }
 
+/* Temporary stub for mm/rmap.c until implemented in mm/huge_memory.c */
+static inline void unmap_team_by_pmd(struct vm_area_struct *vma,
+                       unsigned long addr, pmd_t *pmd, struct page *page)
+{
+}
+
 #endif /* _LINUX_PAGETEAM_H */
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -47,6 +47,7 @@
 
 #include <linux/mm.h>
 #include <linux/pagemap.h>
+#include <linux/pageteam.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
 #include <linux/slab.h>
@@ -687,7 +688,7 @@ pmd_t *mm_find_pmd(struct mm_struct *mm,
        pgd_t *pgd;
        pud_t *pud;
        pmd_t *pmd = NULL;
-       pmd_t pmde;
+       pmd_t pmdval;
 
        pgd = pgd_offset(mm, address);
        if (!pgd_present(*pgd))
@@ -700,12 +701,12 @@ pmd_t *mm_find_pmd(struct mm_struct *mm,
        pmd = pmd_offset(pud, address);
        /*
         * Some THP functions use the sequence pmdp_huge_clear_flush(), 
set_pmd_at()
-        * without holding anon_vma lock for write.  So when looking for a
-        * genuine pmde (in which to find pte), test present and !THP together.
+        * without locking out concurrent rmap lookups.  So when looking for a
+        * pmd entry, in which to find a pte, test present and !THP together.
         */
-       pmde = *pmd;
+       pmdval = *pmd;
        barrier();
-       if (!pmd_present(pmde) || pmd_trans_huge(pmde))
+       if (!pmd_present(pmdval) || pmd_trans_huge(pmdval))
                pmd = NULL;
 out:
        return pmd;
@@ -800,6 +801,7 @@ bool page_check_address_transhuge(struct
        pgd_t *pgd;
        pud_t *pud;
        pmd_t *pmd;
+       pmd_t pmdval;
        pte_t *pte;
        spinlock_t *ptl;
 
@@ -821,32 +823,24 @@ bool page_check_address_transhuge(struct
        if (!pud_present(*pud))
                return false;
        pmd = pmd_offset(pud, address);
+again:
+       pmdval = *pmd;
+       barrier();
+       if (!pmd_present(pmdval))
+               return false;
 
-       if (pmd_trans_huge(*pmd)) {
+       if (pmd_trans_huge(pmdval)) {
+               if (pmd_page(pmdval) != page)
+                       return false;
                ptl = pmd_lock(mm, pmd);
-               if (!pmd_present(*pmd))
-                       goto unlock_pmd;
-               if (unlikely(!pmd_trans_huge(*pmd))) {
+               if (unlikely(!pmd_same(*pmd, pmdval))) {
                        spin_unlock(ptl);
-                       goto map_pte;
+                       goto again;
                }
-
-               if (pmd_page(*pmd) != page)
-                       goto unlock_pmd;
-
                pte = NULL;
                goto found;
-unlock_pmd:
-               spin_unlock(ptl);
-               return false;
-       } else {
-               pmd_t pmde = *pmd;
-
-               barrier();
-               if (!pmd_present(pmde) || pmd_trans_huge(pmde))
-                       return false;
        }
-map_pte:
+
        pte = pte_offset_map(pmd, address);
        if (!pte_present(*pte)) {
                pte_unmap(pte);
@@ -863,7 +857,7 @@ check_pte:
        }
 
        /* THP can be referenced by any subpage */
-       if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
+       if (pte_pfn(*pte) - page_to_pfn(page) >= (1 << compound_order(page))) {
                pte_unmap_unlock(pte, ptl);
                return false;
        }
@@ -1404,6 +1398,7 @@ static int try_to_unmap_one(struct page
                     unsigned long address, void *arg)
 {
        struct mm_struct *mm = vma->vm_mm;
+       pmd_t *pmd;
        pte_t *pte;
        pte_t pteval;
        spinlock_t *ptl;
@@ -1423,8 +1418,7 @@ static int try_to_unmap_one(struct page
                        goto out;
        }
 
-       pte = page_check_address(page, mm, address, &ptl, 0);
-       if (!pte)
+       if (!page_check_address_transhuge(page, mm, address, &pmd, &pte, &ptl))
                goto out;
 
        /*
@@ -1442,6 +1436,19 @@ static int try_to_unmap_one(struct page
                if (flags & TTU_MUNLOCK)
                        goto out_unmap;
        }
+
+       if (!pte) {
+               if (!(flags & TTU_IGNORE_ACCESS) &&
+                   IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
+                   pmdp_clear_flush_young_notify(vma, address, pmd)) {
+                       ret = SWAP_FAIL;
+                       goto out_unmap;
+               }
+               spin_unlock(ptl);
+               unmap_team_by_pmd(vma, address, pmd, page);
+               goto out;
+       }
+
        if (!(flags & TTU_IGNORE_ACCESS)) {
                if (ptep_clear_flush_young_notify(vma, address, pte)) {
                        ret = SWAP_FAIL;
@@ -1542,7 +1549,9 @@ discard:
        put_page(page);
 
 out_unmap:
-       pte_unmap_unlock(pte, ptl);
+       spin_unlock(ptl);
+       if (pte)
+               pte_unmap(pte);
        if (ret != SWAP_FAIL && ret != SWAP_MLOCK && !(flags & TTU_MUNLOCK))
                mmu_notifier_invalidate_page(mm, address);
 out:

Reply via email to