Areas hole punched by fallocate will not have entries in the
region/reserve map.  However, shared mappings with min_size subpool
reservations may still have reserved pages.  alloc_huge_page needs
to handle this special case and do the proper accounting.

Signed-off-by: Mike Kravetz <mike.krav...@oracle.com>
---
 mm/hugetlb.c | 48 +++++++++++++++++++++++++++---------------------
 1 file changed, 27 insertions(+), 21 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ecbaffe..9c295c9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -692,19 +692,9 @@ static int vma_has_reserves(struct vm_area_struct *vma, 
long chg)
                        return 0;
        }
 
-       if (vma->vm_flags & VM_MAYSHARE) {
-               /*
-                * We know VM_NORESERVE is not set.  Therefore, there SHOULD
-                * be a region map for all pages.  The only situation where
-                * there is no region map is if a hole was punched via
-                * fallocate.  In this case, there really are no reverves to
-                * use.  This situation is indicated if chg != 0.
-                */
-               if (chg)
-                       return 0;
-               else
-                       return 1;
-       }
+       /* Shared mappings always use reserves */
+       if (vma->vm_flags & VM_MAYSHARE)
+               return 1;
 
        /*
         * Only the process that called mmap() has reserves for
@@ -1601,6 +1591,7 @@ static struct page *alloc_huge_page(struct vm_area_struct 
*vma,
        struct hstate *h = hstate_vma(vma);
        struct page *page;
        long chg, commit;
+       long gbl_chg;
        int ret, idx;
        struct hugetlb_cgroup *h_cg;
 
@@ -1608,24 +1599,39 @@ static struct page *alloc_huge_page(struct 
vm_area_struct *vma,
        /*
         * Processes that did not create the mapping will have no
         * reserves and will not have accounted against subpool
-        * limit. Check that the subpool limit can be made before
-        * satisfying the allocation MAP_NORESERVE mappings may also
-        * need pages and subpool limit allocated allocated if no reserve
-        * mapping overlaps.
+        * limit. Check that the subpool limit will not be exceeded
+        * before performing the allocation.  Allocations for
+        * MAP_NORESERVE mappings also need to be checked against
+        * any subpool limit.
+        *
+        * NOTE: Shared mappings with holes punched via fallocate
+        * may still have reservations, even without entries in the
+        * reserve map as indicated by vma_needs_reservation.  This
+        * would be the case if hugepage_subpool_get_pages returns
+        * zero to indicate no changes to the global reservation count
+        * are necessary.  In this case, pass the output of
+        * hugepage_subpool_get_pages (zero) to dequeue_huge_page_vma
+        * so that the page is not counted against the global limit.
+        * For MAP_NORESERVE mappings always pass the output of
+        * vma_needs_reservation.  For race detection and error cleanup
+        * use output of vma_needs_reservation as well.
         */
-       chg = vma_needs_reservation(h, vma, addr);
+       chg = gbl_chg = vma_needs_reservation(h, vma, addr);
        if (chg < 0)
                return ERR_PTR(-ENOMEM);
-       if (chg || avoid_reserve)
-               if (hugepage_subpool_get_pages(spool, 1) < 0)
+       if (chg || avoid_reserve) {
+               gbl_chg = hugepage_subpool_get_pages(spool, 1);
+               if (gbl_chg < 0)
                        return ERR_PTR(-ENOSPC);
+       }
 
        ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg);
        if (ret)
                goto out_subpool_put;
 
        spin_lock(&hugetlb_lock);
-       page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, chg);
+       page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve,
+                                       avoid_reserve ? chg : gbl_chg);
        if (!page) {
                spin_unlock(&hugetlb_lock);
                page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to