On Thu, Aug 23, 2018 at 01:59:17PM -0700, Mike Kravetz wrote:
> When fixing an issue with PMD sharing and migration, it was discovered
> via code inspection that other callers of huge_pmd_unshare potentially
> have an issue with cache and tlb flushing.
> 
> Use the routine adjust_range_if_pmd_sharing_possible() to calculate
> worst case ranges for mmu notifiers.  Ensure that this range is flushed
> if huge_pmd_unshare succeeds and unmaps a PUD_SUZE area.

s/PUD_SUZE/PUD_SIZE/

> 
> Signed-off-by: Mike Kravetz <mike.krav...@oracle.com>

Looks good to me.

Reviewed-by: Naoya Horiguchi <n-horigu...@ah.jp.nec.com>

> ---
>  mm/hugetlb.c | 53 +++++++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 44 insertions(+), 9 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index a73c5728e961..082cddf46b4f 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -3333,8 +3333,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, 
> struct vm_area_struct *vma,
>       struct page *page;
>       struct hstate *h = hstate_vma(vma);
>       unsigned long sz = huge_page_size(h);
> -     const unsigned long mmun_start = start; /* For mmu_notifiers */
> -     const unsigned long mmun_end   = end;   /* For mmu_notifiers */
> +     unsigned long mmun_start = start;       /* For mmu_notifiers */
> +     unsigned long mmun_end   = end;         /* For mmu_notifiers */
>  
>       WARN_ON(!is_vm_hugetlb_page(vma));
>       BUG_ON(start & ~huge_page_mask(h));
> @@ -3346,6 +3346,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, 
> struct vm_area_struct *vma,
>        */
>       tlb_remove_check_page_size_change(tlb, sz);
>       tlb_start_vma(tlb, vma);
> +
> +     /*
> +      * If sharing possible, alert mmu notifiers of worst case.
> +      */
> +     adjust_range_if_pmd_sharing_possible(vma, &mmun_start, &mmun_end);
>       mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
>       address = start;
>       for (; address < end; address += sz) {
> @@ -3356,6 +3361,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, 
> struct vm_area_struct *vma,
>               ptl = huge_pte_lock(h, mm, ptep);
>               if (huge_pmd_unshare(mm, &address, ptep)) {
>                       spin_unlock(ptl);
> +                     /*
> +                      * We just unmapped a page of PMDs by clearing a PUD.
> +                      * The caller's TLB flush range should cover this area.
> +                      */
>                       continue;
>               }
>  
> @@ -3438,12 +3447,23 @@ void unmap_hugepage_range(struct vm_area_struct *vma, 
> unsigned long start,
>  {
>       struct mm_struct *mm;
>       struct mmu_gather tlb;
> +     unsigned long tlb_start = start;
> +     unsigned long tlb_end = end;
> +
> +     /*
> +      * If shared PMDs were possibly used within this vma range, adjust
> +      * start/end for worst case tlb flushing.
> +      * Note that we can not be sure if PMDs are shared until we try to
> +      * unmap pages.  However, we want to make sure TLB flushing covers
> +      * the largest possible range.
> +      */
> +     adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end);
>  
>       mm = vma->vm_mm;
>  
> -     tlb_gather_mmu(&tlb, mm, start, end);
> +     tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end);
>       __unmap_hugepage_range(&tlb, vma, start, end, ref_page);
> -     tlb_finish_mmu(&tlb, start, end);
> +     tlb_finish_mmu(&tlb, tlb_start, tlb_end);
>  }
>  
>  /*
> @@ -4309,11 +4329,21 @@ unsigned long hugetlb_change_protection(struct 
> vm_area_struct *vma,
>       pte_t pte;
>       struct hstate *h = hstate_vma(vma);
>       unsigned long pages = 0;
> +     unsigned long f_start = start;
> +     unsigned long f_end = end;
> +     bool shared_pmd = false;
> +
> +     /*
> +      * In the case of shared PMDs, the area to flush could be beyond
> +      * start/end.  Set f_start/f_end to cover the maximum possible
> +      * range if PMD sharing is possible.
> +      */
> +     adjust_range_if_pmd_sharing_possible(vma, &f_start, &f_end);
>  
>       BUG_ON(address >= end);
> -     flush_cache_range(vma, address, end);
> +     flush_cache_range(vma, f_start, f_end);
>  
> -     mmu_notifier_invalidate_range_start(mm, start, end);
> +     mmu_notifier_invalidate_range_start(mm, f_start, f_end);
>       i_mmap_lock_write(vma->vm_file->f_mapping);
>       for (; address < end; address += huge_page_size(h)) {
>               spinlock_t *ptl;
> @@ -4324,6 +4354,7 @@ unsigned long hugetlb_change_protection(struct 
> vm_area_struct *vma,
>               if (huge_pmd_unshare(mm, &address, ptep)) {
>                       pages++;
>                       spin_unlock(ptl);
> +                     shared_pmd = true;
>                       continue;
>               }
>               pte = huge_ptep_get(ptep);
> @@ -4359,9 +4390,13 @@ unsigned long hugetlb_change_protection(struct 
> vm_area_struct *vma,
>        * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
>        * may have cleared our pud entry and done put_page on the page table:
>        * once we release i_mmap_rwsem, another task can do the final put_page
> -      * and that page table be reused and filled with junk.
> +      * and that page table be reused and filled with junk.  If we actually
> +      * did unshare a page of pmds, flush the range corresponding to the pud.
>        */
> -     flush_hugetlb_tlb_range(vma, start, end);
> +     if (shared_pmd)
> +             flush_hugetlb_tlb_range(vma, f_start, f_end);
> +     else
> +             flush_hugetlb_tlb_range(vma, start, end);
>       /*
>        * No need to call mmu_notifier_invalidate_range() we are downgrading
>        * page table protection not changing it to point to a new page.
> @@ -4369,7 +4404,7 @@ unsigned long hugetlb_change_protection(struct 
> vm_area_struct *vma,
>        * See Documentation/vm/mmu_notifier.rst
>        */
>       i_mmap_unlock_write(vma->vm_file->f_mapping);
> -     mmu_notifier_invalidate_range_end(mm, start, end);
> +     mmu_notifier_invalidate_range_end(mm, f_start, f_end);
>  
>       return pages << h->order;
>  }
> -- 
> 2.17.1
> 
> 

Reply via email to