For either swap and page migration, we all use the bit 2 of the entry to
identify whether this entry is uffd write-protected.  It plays a similar
role as the existing soft dirty bit in swap entries but only for keeping
the uffd-wp tracking for a specific PTE/PMD.

Something special here is that when we want to recover the uffd-wp bit
from a swap/migration entry to the PTE bit we'll also need to take care
of the _PAGE_RW bit and make sure it's cleared, otherwise even with the
_PAGE_UFFD_WP bit we can't trap it at all.

Note that this patch removed two lines from "userfaultfd: wp: hook
userfault handler to write protection fault" where we try to remove the
VM_FAULT_WRITE from vmf->flags when uffd-wp is set for the VMA.  This
patch will still keep the write flag there.

Reviewed-by: Mike Rapoport <r...@linux.vnet.ibm.com>
Signed-off-by: Peter Xu <pet...@redhat.com>
---
 include/linux/swapops.h | 2 ++
 mm/huge_memory.c        | 3 +++
 mm/memory.c             | 6 ++++++
 mm/migrate.c            | 4 ++++
 mm/mprotect.c           | 2 ++
 mm/rmap.c               | 6 ++++++
 6 files changed, 23 insertions(+)

diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 4d961668e5fc..0c2923b1cdb7 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -68,6 +68,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte)
 
        if (pte_swp_soft_dirty(pte))
                pte = pte_swp_clear_soft_dirty(pte);
+       if (pte_swp_uffd_wp(pte))
+               pte = pte_swp_clear_uffd_wp(pte);
        arch_entry = __pte_to_swp_entry(pte);
        return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
 }
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index fb2234cb595a..75de07141801 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2175,6 +2175,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct 
*vma, pmd_t *pmd,
                write = is_write_migration_entry(entry);
                young = false;
                soft_dirty = pmd_swp_soft_dirty(old_pmd);
+               uffd_wp = pmd_swp_uffd_wp(old_pmd);
        } else {
                page = pmd_page(old_pmd);
                if (pmd_dirty(old_pmd))
@@ -2207,6 +2208,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct 
*vma, pmd_t *pmd,
                        entry = swp_entry_to_pte(swp_entry);
                        if (soft_dirty)
                                entry = pte_swp_mksoft_dirty(entry);
+                       if (uffd_wp)
+                               entry = pte_swp_mkuffd_wp(entry);
                } else {
                        entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
                        entry = maybe_mkwrite(entry, vma);
diff --git a/mm/memory.c b/mm/memory.c
index 6405d56debee..c3d57fa890f2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -736,6 +736,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct 
*src_mm,
                                pte = swp_entry_to_pte(entry);
                                if (pte_swp_soft_dirty(*src_pte))
                                        pte = pte_swp_mksoft_dirty(pte);
+                               if (pte_swp_uffd_wp(*src_pte))
+                                       pte = pte_swp_mkuffd_wp(pte);
                                set_pte_at(src_mm, addr, src_pte, pte);
                        }
                } else if (is_device_private_entry(entry)) {
@@ -2825,6 +2827,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
        flush_icache_page(vma, page);
        if (pte_swp_soft_dirty(vmf->orig_pte))
                pte = pte_mksoft_dirty(pte);
+       if (pte_swp_uffd_wp(vmf->orig_pte)) {
+               pte = pte_mkuffd_wp(pte);
+               pte = pte_wrprotect(pte);
+       }
        set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
        arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
        vmf->orig_pte = pte;
diff --git a/mm/migrate.c b/mm/migrate.c
index 181f5d2718a9..72cde187d4a1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -241,6 +241,8 @@ static bool remove_migration_pte(struct page *page, struct 
vm_area_struct *vma,
                entry = pte_to_swp_entry(*pvmw.pte);
                if (is_write_migration_entry(entry))
                        pte = maybe_mkwrite(pte, vma);
+               else if (pte_swp_uffd_wp(*pvmw.pte))
+                       pte = pte_mkuffd_wp(pte);
 
                if (unlikely(is_zone_device_page(new))) {
                        if (is_device_private_page(new)) {
@@ -2301,6 +2303,8 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
                        swp_pte = swp_entry_to_pte(entry);
                        if (pte_soft_dirty(pte))
                                swp_pte = pte_swp_mksoft_dirty(swp_pte);
+                       if (pte_uffd_wp(pte))
+                               swp_pte = pte_swp_mkuffd_wp(swp_pte);
                        set_pte_at(mm, addr, ptep, swp_pte);
 
                        /*
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 855dddb07ff2..96c0f521099d 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -196,6 +196,8 @@ static unsigned long change_pte_range(struct vm_area_struct 
*vma, pmd_t *pmd,
                                newpte = swp_entry_to_pte(entry);
                                if (pte_swp_soft_dirty(oldpte))
                                        newpte = pte_swp_mksoft_dirty(newpte);
+                               if (pte_swp_uffd_wp(oldpte))
+                                       newpte = pte_swp_mkuffd_wp(newpte);
                                set_pte_at(mm, addr, pte, newpte);
 
                                pages++;
diff --git a/mm/rmap.c b/mm/rmap.c
index 0454ecc29537..3750d5a5283c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1469,6 +1469,8 @@ static bool try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
                        swp_pte = swp_entry_to_pte(entry);
                        if (pte_soft_dirty(pteval))
                                swp_pte = pte_swp_mksoft_dirty(swp_pte);
+                       if (pte_uffd_wp(pteval))
+                               swp_pte = pte_swp_mkuffd_wp(swp_pte);
                        set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
                        /*
                         * No need to invalidate here it will synchronize on
@@ -1561,6 +1563,8 @@ static bool try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
                        swp_pte = swp_entry_to_pte(entry);
                        if (pte_soft_dirty(pteval))
                                swp_pte = pte_swp_mksoft_dirty(swp_pte);
+                       if (pte_uffd_wp(pteval))
+                               swp_pte = pte_swp_mkuffd_wp(swp_pte);
                        set_pte_at(mm, address, pvmw.pte, swp_pte);
                        /*
                         * No need to invalidate here it will synchronize on
@@ -1627,6 +1631,8 @@ static bool try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
                        swp_pte = swp_entry_to_pte(entry);
                        if (pte_soft_dirty(pteval))
                                swp_pte = pte_swp_mksoft_dirty(swp_pte);
+                       if (pte_uffd_wp(pteval))
+                               swp_pte = pte_swp_mkuffd_wp(swp_pte);
                        set_pte_at(mm, address, pvmw.pte, swp_pte);
                        /* Invalidate as we cleared the pte */
                        mmu_notifier_invalidate_range(mm, address,
-- 
2.17.1

Reply via email to