Jérôme Glisse <jgli...@redhat.com> writes:

> [ text/plain ]
> This patch add helper for device page fault. Thus helpers will fill
> the mirror page table using the CPU page table and synchronizing
> with any update to CPU page table.
>
> Changed since v1:
>   - Add comment about directory lock.
>
> Changed since v2:
>   - Check for mirror->hmm in hmm_mirror_fault()
>
> Changed since v3:
>   - Adapt to HMM page table changes.
>
> Changed since v4:
>   - Fix PROT_NONE, ie do not populate from protnone pte.
>   - Fix huge pmd handling (start address may != pmd start address)
>   - Fix missing entry case.
>
> Signed-off-by: Jérôme Glisse <jgli...@redhat.com>
> Signed-off-by: Sherry Cheung <sche...@nvidia.com>
> Signed-off-by: Subhash Gutti <sgu...@nvidia.com>
> Signed-off-by: Mark Hairgrove <mhairgr...@nvidia.com>
> Signed-off-by: John Hubbard <jhubb...@nvidia.com>
> Signed-off-by: Jatin Kumar <jaku...@nvidia.com>
> ---


....
....

 +static int hmm_mirror_fault_hpmd(struct hmm_mirror *mirror,
> +                              struct hmm_event *event,
> +                              struct vm_area_struct *vma,
> +                              struct hmm_pt_iter *iter,
> +                              pmd_t *pmdp,
> +                              struct hmm_mirror_fault *mirror_fault,
> +                              unsigned long start,
> +                              unsigned long end)
> +{
> +     struct page *page;
> +     unsigned long addr, pfn;
> +     unsigned flags = FOLL_TOUCH;
> +     spinlock_t *ptl;
> +     int ret;
> +
> +     ptl = pmd_lock(mirror->hmm->mm, pmdp);
> +     if (unlikely(!pmd_trans_huge(*pmdp))) {
> +             spin_unlock(ptl);
> +             return -EAGAIN;
> +     }
> +     flags |= event->etype == HMM_DEVICE_WFAULT ? FOLL_WRITE : 0;
> +     page = follow_trans_huge_pmd(vma, start, pmdp, flags);
> +     pfn = page_to_pfn(page);
> +     spin_unlock(ptl);
> +
> +     /* Just fault in the whole PMD. */
> +     start &= PMD_MASK;
> +     end = start + PMD_SIZE - 1;
> +
> +     if (!pmd_write(*pmdp) && event->etype == HMM_DEVICE_WFAULT)
> +                     return -ENOENT;
> +
> +     for (ret = 0, addr = start; !ret && addr < end;) {
> +             unsigned long i, next = end;
> +             dma_addr_t *hmm_pte;
> +
> +             hmm_pte = hmm_pt_iter_populate(iter, addr, &next);
> +             if (!hmm_pte)
> +                     return -ENOMEM;
> +
> +             i = hmm_pt_index(&mirror->pt, addr, mirror->pt.llevel);
> +
> +             /*
> +              * The directory lock protect against concurrent clearing of
> +              * page table bit flags. Exceptions being the dirty bit and
> +              * the device driver private flags.
> +              */
> +             hmm_pt_iter_directory_lock(iter);
> +             do {
> +                     if (!hmm_pte_test_valid_pfn(&hmm_pte[i])) {
> +                             hmm_pte[i] = hmm_pte_from_pfn(pfn);
> +                             hmm_pt_iter_directory_ref(iter);

I looked at that and it is actually 
static inline void hmm_pt_iter_directory_ref(struct hmm_pt_iter *iter)
{
        BUG_ON(!iter->ptd[iter->pt->llevel - 1]);
        hmm_pt_directory_ref(iter->pt, iter->ptd[iter->pt->llevel - 1]);
}

static inline void hmm_pt_directory_ref(struct hmm_pt *pt,
                                        struct page *ptd)
{
        if (!atomic_inc_not_zero(&ptd->_mapcount))
                /* Illegal this should not happen. */
                BUG();
}

what is the mapcount update about ?

> +                     }
> +                     BUG_ON(hmm_pte_pfn(hmm_pte[i]) != pfn);
> +                     if (pmd_write(*pmdp))
> +                             hmm_pte_set_write(&hmm_pte[i]);
> +             } while (addr += PAGE_SIZE, pfn++, i++, addr != next);
> +             hmm_pt_iter_directory_unlock(iter);
> +             mirror_fault->addr = addr;
> +     }
> +

So we don't have huge page mapping in hmm page table ? 


> +     return 0;
> +}
> +
> +static int hmm_pte_hole(unsigned long addr,
> +                     unsigned long next,
> +                     struct mm_walk *walk)
> +{
> +     return -ENOENT;
> +}
> +


-aneesh

Reply via email to