Currently DAX folio/page reference counts are managed differently to
normal pages. To allow these to be managed the same as normal pages
introduce dax_insert_pfn_pmd. This will map the entire PMD-sized folio
and take references as it would for a normally mapped page.

This is distinct from the current mechanism, vmf_insert_pfn_pmd, which
simply inserts a special devmap PMD entry into the page table without
holding a reference to the page for the mapping.

Signed-off-by: Alistair Popple <apop...@nvidia.com>
---
 include/linux/huge_mm.h |  1 +-
 mm/huge_memory.c        | 57 ++++++++++++++++++++++++++++++++++--------
 2 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index d3a1872..eaf3f78 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -40,6 +40,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct 
vm_area_struct *vma,
 
 vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write);
 vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write);
+vm_fault_t dax_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write);
 vm_fault_t dax_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write);
 
 enum transparent_hugepage_flag {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e8985a4..790041e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1237,14 +1237,12 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, 
unsigned long addr,
 {
        struct mm_struct *mm = vma->vm_mm;
        pmd_t entry;
-       spinlock_t *ptl;
 
-       ptl = pmd_lock(mm, pmd);
        if (!pmd_none(*pmd)) {
                if (write) {
                        if (pmd_pfn(*pmd) != pfn_t_to_pfn(pfn)) {
                                WARN_ON_ONCE(!is_huge_zero_pmd(*pmd));
-                               goto out_unlock;
+                               return;
                        }
                        entry = pmd_mkyoung(*pmd);
                        entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
@@ -1252,7 +1250,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, 
unsigned long addr,
                                update_mmu_cache_pmd(vma, addr, pmd);
                }
 
-               goto out_unlock;
+               return;
        }
 
        entry = pmd_mkhuge(pfn_t_pmd(pfn, prot));
@@ -1271,11 +1269,6 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, 
unsigned long addr,
 
        set_pmd_at(mm, addr, pmd, entry);
        update_mmu_cache_pmd(vma, addr, pmd);
-
-out_unlock:
-       spin_unlock(ptl);
-       if (pgtable)
-               pte_free(mm, pgtable);
 }
 
 /**
@@ -1294,6 +1287,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t 
pfn, bool write)
        struct vm_area_struct *vma = vmf->vma;
        pgprot_t pgprot = vma->vm_page_prot;
        pgtable_t pgtable = NULL;
+       spinlock_t *ptl;
 
        /*
         * If we had pmd_special, we could avoid all these restrictions,
@@ -1316,12 +1310,55 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, 
pfn_t pfn, bool write)
        }
 
        track_pfn_insert(vma, &pgprot, pfn);
-
+       ptl = pmd_lock(vma->vm_mm, vmf->pmd);
        insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
+       spin_unlock(ptl);
+       if (pgtable)
+               pte_free(vma->vm_mm, pgtable);
+
        return VM_FAULT_NOPAGE;
 }
 EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
 
+vm_fault_t dax_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write)
+{
+       struct vm_area_struct *vma = vmf->vma;
+       unsigned long addr = vmf->address & PMD_MASK;
+       struct mm_struct *mm = vma->vm_mm;
+       spinlock_t *ptl;
+       pgtable_t pgtable = NULL;
+       struct folio *folio;
+       struct page *page;
+
+       if (addr < vma->vm_start || addr >= vma->vm_end)
+               return VM_FAULT_SIGBUS;
+
+       if (arch_needs_pgtable_deposit()) {
+               pgtable = pte_alloc_one(vma->vm_mm);
+               if (!pgtable)
+                       return VM_FAULT_OOM;
+       }
+
+       track_pfn_insert(vma, &vma->vm_page_prot, pfn);
+
+       ptl = pmd_lock(mm, vmf->pmd);
+       if (pmd_none(*vmf->pmd)) {
+               page = pfn_t_to_page(pfn);
+               folio = page_folio(page);
+               folio_get(folio);
+               folio_add_file_rmap_pmd(folio, page, vma);
+               add_mm_counter(mm, mm_counter_file(folio), HPAGE_PMD_NR);
+       }
+       insert_pfn_pmd(vma, addr, vmf->pmd, pfn, vma->vm_page_prot,
+               write, pgtable);
+       spin_unlock(ptl);
+       if (pgtable)
+               pte_free(mm, pgtable);
+
+       return VM_FAULT_NOPAGE;
+}
+EXPORT_SYMBOL_GPL(dax_insert_pfn_pmd);
+
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma)
 {
-- 
git-series 0.9.1

Reply via email to