Currently DAX folio/page reference counts are managed differently to
normal pages. To allow these to be managed the same as normal pages
introduce vmf_insert_folio_pmd. This will map the entire PMD-sized folio
and take references as it would for a normally mapped page.

This is distinct from the current mechanism, vmf_insert_pfn_pmd, which
simply inserts a special devmap PMD entry into the page table without
holding a reference to the page for the mapping.

Signed-off-by: Alistair Popple <apop...@nvidia.com>

---

Changes for v5:
 - Minor code cleanup suggested by David
---
 include/linux/huge_mm.h |  1 +-
 mm/huge_memory.c        | 54 ++++++++++++++++++++++++++++++++++--------
 2 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 5bd1ff7..3633bd3 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -39,6 +39,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct 
vm_area_struct *vma,
 
 vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write);
 vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write);
+vm_fault_t vmf_insert_folio_pmd(struct vm_fault *vmf, struct folio *folio, 
bool write);
 vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio, 
bool write);
 
 enum transparent_hugepage_flag {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 256adc3..d1ea76e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1381,14 +1381,12 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, 
unsigned long addr,
 {
        struct mm_struct *mm = vma->vm_mm;
        pmd_t entry;
-       spinlock_t *ptl;
 
-       ptl = pmd_lock(mm, pmd);
        if (!pmd_none(*pmd)) {
                if (write) {
                        if (pmd_pfn(*pmd) != pfn_t_to_pfn(pfn)) {
                                WARN_ON_ONCE(!is_huge_zero_pmd(*pmd));
-                               goto out_unlock;
+                               return;
                        }
                        entry = pmd_mkyoung(*pmd);
                        entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
@@ -1396,7 +1394,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, 
unsigned long addr,
                                update_mmu_cache_pmd(vma, addr, pmd);
                }
 
-               goto out_unlock;
+               return;
        }
 
        entry = pmd_mkhuge(pfn_t_pmd(pfn, prot));
@@ -1417,11 +1415,6 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, 
unsigned long addr,
 
        set_pmd_at(mm, addr, pmd, entry);
        update_mmu_cache_pmd(vma, addr, pmd);
-
-out_unlock:
-       spin_unlock(ptl);
-       if (pgtable)
-               pte_free(mm, pgtable);
 }
 
 /**
@@ -1440,6 +1433,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t 
pfn, bool write)
        struct vm_area_struct *vma = vmf->vma;
        pgprot_t pgprot = vma->vm_page_prot;
        pgtable_t pgtable = NULL;
+       spinlock_t *ptl;
 
        /*
         * If we had pmd_special, we could avoid all these restrictions,
@@ -1462,12 +1456,52 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, 
pfn_t pfn, bool write)
        }
 
        track_pfn_insert(vma, &pgprot, pfn);
-
+       ptl = pmd_lock(vma->vm_mm, vmf->pmd);
        insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
+       spin_unlock(ptl);
+       if (pgtable)
+               pte_free(vma->vm_mm, pgtable);
+
        return VM_FAULT_NOPAGE;
 }
 EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
 
+vm_fault_t vmf_insert_folio_pmd(struct vm_fault *vmf, struct folio *folio, 
bool write)
+{
+       struct vm_area_struct *vma = vmf->vma;
+       unsigned long addr = vmf->address & PMD_MASK;
+       struct mm_struct *mm = vma->vm_mm;
+       spinlock_t *ptl;
+       pgtable_t pgtable = NULL;
+
+       if (addr < vma->vm_start || addr >= vma->vm_end)
+               return VM_FAULT_SIGBUS;
+
+       if (WARN_ON_ONCE(folio_order(folio) != PMD_ORDER))
+               return VM_FAULT_SIGBUS;
+
+       if (arch_needs_pgtable_deposit()) {
+               pgtable = pte_alloc_one(vma->vm_mm);
+               if (!pgtable)
+                       return VM_FAULT_OOM;
+       }
+
+       ptl = pmd_lock(mm, vmf->pmd);
+       if (pmd_none(*vmf->pmd)) {
+               folio_get(folio);
+               folio_add_file_rmap_pmd(folio, &folio->page, vma);
+               add_mm_counter(mm, mm_counter_file(folio), HPAGE_PMD_NR);
+       }
+       insert_pfn_pmd(vma, addr, vmf->pmd, pfn_to_pfn_t(folio_pfn(folio)),
+                      vma->vm_page_prot, write, pgtable);
+       spin_unlock(ptl);
+       if (pgtable)
+               pte_free(mm, pgtable);
+
+       return VM_FAULT_NOPAGE;
+}
+EXPORT_SYMBOL_GPL(vmf_insert_folio_pmd);
+
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma)
 {
-- 
git-series 0.9.1

Reply via email to