On Thu, Jul 17, 2025 at 01:52:07PM +0200, David Hildenbrand wrote:
> Let's convert to vmf_insert_folio_pmd().
>
> There is a theoretical change in behavior: in the unlikely case there is
> already something mapped, we'll now still call trace_dax_pmd_load_hole()
> and return VM_FAULT_NOPAGE.
>
> Previously, we would have returned VM_FAULT_FALLBACK, and the caller
> would have zapped the PMD to try a PTE fault.
>
> However, that behavior was different to other PTE+PMD faults, when there
> would already be something mapped, and it's not even clear if it could
> be triggered.
>
> Assuming the huge zero folio is already mapped, all good, no need to
> fallback to PTEs.
>
> Assuming there is already a leaf page table ... the behavior would be
> just like when trying to insert a PMD mapping a folio through
> dax_fault_iter()->vmf_insert_folio_pmd().
>
> Assuming there is already something else mapped as PMD? It sounds like
> a BUG, and the behavior would be just like when trying to insert a PMD
> mapping a folio through dax_fault_iter()->vmf_insert_folio_pmd().
>
> So, it sounds reasonable to not handle huge zero folios differently
> to inserting PMDs mapping folios when there already is something mapped.

Sounds reasonable.

>
> Reviewed-by: Alistair Popple <apop...@nvidia.com>
> Signed-off-by: David Hildenbrand <da...@redhat.com>

I've carefully checked through this and am happy that this is right. This is a
really nice cleanup :)

So:

Reviewed-by: Lorenzo Stoakes <lorenzo.stoa...@oracle.com>

> ---
>  fs/dax.c | 47 ++++++++++-------------------------------------
>  1 file changed, 10 insertions(+), 37 deletions(-)
>
> diff --git a/fs/dax.c b/fs/dax.c
> index 4229513806bea..ae90706674a3f 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1375,51 +1375,24 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state 
> *xas, struct vm_fault *vmf,
>               const struct iomap_iter *iter, void **entry)
>  {
>       struct address_space *mapping = vmf->vma->vm_file->f_mapping;
> -     unsigned long pmd_addr = vmf->address & PMD_MASK;
> -     struct vm_area_struct *vma = vmf->vma;
>       struct inode *inode = mapping->host;
> -     pgtable_t pgtable = NULL;
>       struct folio *zero_folio;
> -     spinlock_t *ptl;
> -     pmd_t pmd_entry;
> -     unsigned long pfn;
> +     vm_fault_t ret;
>
>       zero_folio = mm_get_huge_zero_folio(vmf->vma->vm_mm);
>
> -     if (unlikely(!zero_folio))
> -             goto fallback;
> -
> -     pfn = page_to_pfn(&zero_folio->page);
> -     *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn,
> -                               DAX_PMD | DAX_ZERO_PAGE);
> -
> -     if (arch_needs_pgtable_deposit()) {
> -             pgtable = pte_alloc_one(vma->vm_mm);
> -             if (!pgtable)
> -                     return VM_FAULT_OOM;
> -     }
> -
> -     ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
> -     if (!pmd_none(*(vmf->pmd))) {
> -             spin_unlock(ptl);
> -             goto fallback;
> +     if (unlikely(!zero_folio)) {
> +             trace_dax_pmd_load_hole_fallback(inode, vmf, zero_folio, 
> *entry);
> +             return VM_FAULT_FALLBACK;
>       }
>
> -     if (pgtable) {
> -             pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
> -             mm_inc_nr_ptes(vma->vm_mm);
> -     }
> -     pmd_entry = folio_mk_pmd(zero_folio, vmf->vma->vm_page_prot);
> -     set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
> -     spin_unlock(ptl);
> -     trace_dax_pmd_load_hole(inode, vmf, zero_folio, *entry);
> -     return VM_FAULT_NOPAGE;
> +     *entry = dax_insert_entry(xas, vmf, iter, *entry, folio_pfn(zero_folio),
> +                               DAX_PMD | DAX_ZERO_PAGE);
>
> -fallback:
> -     if (pgtable)
> -             pte_free(vma->vm_mm, pgtable);
> -     trace_dax_pmd_load_hole_fallback(inode, vmf, zero_folio, *entry);
> -     return VM_FAULT_FALLBACK;
> +     ret = vmf_insert_folio_pmd(vmf, zero_folio, false);
> +     if (ret == VM_FAULT_NOPAGE)
> +             trace_dax_pmd_load_hole(inode, vmf, zero_folio, *entry);
> +     return ret;
>  }
>  #else
>  static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault 
> *vmf,
> --
> 2.50.1
>

Reply via email to