From: Davidlohr Bueso <d...@stgolabs.net> The i_mmap_rwsem protects shared pages against races when doing the sharing and unsharing, ultimately calling huge_pmd_share/unshare() for PMD pages -- it also needs it to avoid races when populating the pud for pmd allocation when looking for a shareable pmd page for hugetlb. Ultimately the interval tree remains intact.
Signed-off-by: Davidlohr Bueso <dbu...@suse.de> --- Resending this patch due to stupid email quota rules, *sigh* fs/hugetlbfs/inode.c | 4 ++-- mm/hugetlb.c | 12 ++++++------ mm/memory.c | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 5eba47f..0dca54d 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -412,10 +412,10 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) pgoff = offset >> PAGE_SHIFT; i_size_write(inode, offset); - i_mmap_lock_write(mapping); + i_mmap_lock_read(mapping); if (!RB_EMPTY_ROOT(&mapping->i_mmap)) hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); - i_mmap_unlock_write(mapping); + i_mmap_unlock_read(mapping); truncate_hugepages(inode, offset); return 0; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7eeab54..f68dd21 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2772,7 +2772,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, * this mapping should be shared between all the VMAs, * __unmap_hugepage_range() is called as the lock is already held */ - i_mmap_lock_write(mapping); + i_mmap_lock_read(mapping); vma_interval_tree_foreach(iter_vma, &mapping->i_mmap, pgoff, pgoff) { /* Do not unmap the current VMA */ if (iter_vma == vma) @@ -2789,7 +2789,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, unmap_hugepage_range(iter_vma, address, address + huge_page_size(h), page); } - i_mmap_unlock_write(mapping); + i_mmap_unlock_read(mapping); } /* @@ -3346,7 +3346,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, flush_cache_range(vma, address, end); mmu_notifier_invalidate_range_start(mm, start, end); - i_mmap_lock_write(vma->vm_file->f_mapping); + i_mmap_lock_read(vma->vm_file->f_mapping); for (; address < end; address += huge_page_size(h)) { spinlock_t *ptl; ptep = huge_pte_offset(mm, address); @@ -3374,7 +3374,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, * and that page table be reused and filled with junk. */ flush_tlb_range(vma, start, end); - i_mmap_unlock_write(vma->vm_file->f_mapping); + i_mmap_unlock_read(vma->vm_file->f_mapping); mmu_notifier_invalidate_range_end(mm, start, end); return pages << h->order; @@ -3542,7 +3542,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) if (!vma_shareable(vma, addr)) return (pte_t *)pmd_alloc(mm, pud, addr); - i_mmap_lock_write(mapping); + i_mmap_lock_read(mapping); vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { if (svma == vma) continue; @@ -3570,7 +3570,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) spin_unlock(ptl); out: pte = (pte_t *)pmd_alloc(mm, pud, addr); - i_mmap_unlock_write(mapping); + i_mmap_unlock_read(mapping); return pte; } diff --git a/mm/memory.c b/mm/memory.c index d16c662..b1931c1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1339,9 +1339,9 @@ static void unmap_single_vma(struct mmu_gather *tlb, * safe to do nothing in this case. */ if (vma->vm_file) { - i_mmap_lock_write(vma->vm_file->f_mapping); + i_mmap_lock_read(vma->vm_file->f_mapping); __unmap_hugepage_range_final(tlb, vma, start, end, NULL); - i_mmap_unlock_write(vma->vm_file->f_mapping); + i_mmap_unlock_read(vma->vm_file->f_mapping); } } else unmap_page_range(tlb, vma, start, end, details); -- 1.8.4.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/