In collapse_pte_mapped_thp(), we may modify the pte and pmd entry after
acquring the ptl, so convert it to using pte_offset_map_rw_nolock(). At
this time, the pte_same() check is not performed after the PTL held. So we
should get pgt_pmd and do pmd_same() check after the ptl held.

Signed-off-by: Qi Zheng <zhengqi.a...@bytedance.com>
---
 mm/khugepaged.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 6498721d4783a..8ab79c13d077f 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1605,7 +1605,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, 
unsigned long addr,
        if (userfaultfd_armed(vma) && !(vma->vm_flags & VM_SHARED))
                pml = pmd_lock(mm, pmd);
 
-       start_pte = pte_offset_map_nolock(mm, pmd, haddr, &ptl);
+       start_pte = pte_offset_map_rw_nolock(mm, pmd, haddr, &pgt_pmd, &ptl);
        if (!start_pte)         /* mmap_lock + page lock should prevent this */
                goto abort;
        if (!pml)
@@ -1613,6 +1613,9 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, 
unsigned long addr,
        else if (ptl != pml)
                spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
 
+       if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd))))
+               goto abort;
+
        /* step 2: clear page table and adjust rmap */
        for (i = 0, addr = haddr, pte = start_pte;
             i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
@@ -1645,7 +1648,6 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, 
unsigned long addr,
                nr_ptes++;
        }
 
-       pte_unmap(start_pte);
        if (!pml)
                spin_unlock(ptl);
 
@@ -1658,13 +1660,19 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, 
unsigned long addr,
        /* step 4: remove empty page table */
        if (!pml) {
                pml = pmd_lock(mm, pmd);
-               if (ptl != pml)
+               if (ptl != pml) {
                        spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
+                       if (unlikely(!pmd_same(pgt_pmd, 
pmdp_get_lockless(pmd)))) {
+                               spin_unlock(pml);
+                               goto abort;
+                       }
+               }
        }
        pgt_pmd = pmdp_collapse_flush(vma, haddr, pmd);
        pmdp_get_lockless_sync();
        if (ptl != pml)
                spin_unlock(ptl);
+       pte_unmap(start_pte);
        spin_unlock(pml);
 
        mmu_notifier_invalidate_range_end(&range);
-- 
2.20.1


Reply via email to