2/2

--
SUSE Labs, Novell Inc.

Remove PageReserved() calls from core code by tightening VM_RESERVED
handling in mm/ to cover PageReserved functionality.

PageReserved special casing is removed from get_page and put_page.

All setting and clearning of PageReserved is retained, and it is now
flagged in the page_alloc checks to help ensure we don't introduce
any refcount based freeing of Reserved pages.

MAP_PRIVATE, PROT_WRITE of VM_RESERVED regions is tentatively being
deprecated. We never completely handled it correctly anyway, and is
difficult to handle nicely - difficult but not impossible, it could
be reintroduced in future if required (Hugh has a proof of concept).

Once PageReserved() calls are removed from kernel/power/swsusp.c, and
all arch/ and driver code, the Set and Clear calls, and the PG_reserved
bit can be trivially removed.

Last real user of PageReserved is swsusp, which uses PageReserved to
determine whether a struct page points to valid memory or not. This
still needs to be addressed.

Many thanks to Hugh Dickins for input.

Signed-off-by: Nick Piggin <[EMAIL PROTECTED]>


Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -156,7 +156,8 @@ extern unsigned int kobjsize(const void 
 
 #define VM_DONTCOPY    0x00020000      /* Do not copy this vma on fork */
 #define VM_DONTEXPAND  0x00040000      /* Cannot expand with mremap() */
-#define VM_RESERVED    0x00080000      /* Don't unmap it from swap_out */
+#define VM_RESERVED    0x00080000      /* Pages and ptes in region aren't 
managed with regular pagecache or rmap routines */
+
 #define VM_ACCOUNT     0x00100000      /* Is a VM accounted object */
 #define VM_HUGETLB     0x00400000      /* Huge TLB Page VM */
 #define VM_NONLINEAR   0x00800000      /* Is non-linear (remap_file_pages) */
@@ -337,7 +338,7 @@ static inline void get_page(struct page 
 
 static inline void put_page(struct page *page)
 {
-       if (!PageReserved(page) && put_page_testzero(page))
+       if (put_page_testzero(page))
                __page_cache_release(page);
 }
 
@@ -723,6 +724,9 @@ void install_arg_page(struct vm_area_str
 
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned 
long start,
                int len, int write, int force, struct page **pages, struct 
vm_area_struct **vmas);
+#define invalid_pfn(pte, vm_flags, vaddr)              \
+               __invalid_pfn(__FUNCTION__, pte, vm_flags, vaddr)
+void __invalid_pfn(const char *, pte_t, unsigned long, unsigned long);
 
 int __set_page_dirty_buffers(struct page *page);
 int __set_page_dirty_nobuffers(struct page *page);
Index: linux-2.6/mm/madvise.c
===================================================================
--- linux-2.6.orig/mm/madvise.c
+++ linux-2.6/mm/madvise.c
@@ -123,7 +123,7 @@ static long madvise_dontneed(struct vm_a
                             unsigned long start, unsigned long end)
 {
        *prev = vma;
-       if ((vma->vm_flags & VM_LOCKED) || is_vm_hugetlb_page(vma))
+       if ((vma->vm_flags & (VM_LOCKED|VM_RESERVED)) || 
is_vm_hugetlb_page(vma))
                return -EINVAL;
 
        if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -333,6 +333,21 @@ out:
 }
 
 /*
+ * This function is called to print an error when a pte in a
+ * !VM_RESERVED region is found pointing to an invalid pfn (which
+ * is an error.
+ *
+ * The calling function must still handle the error.
+ */
+void __invalid_pfn(const char *errfunc, pte_t pte,
+                               unsigned long vm_flags, unsigned long vaddr)
+{
+       printk(KERN_ERR "%s: pte does not point to valid memory. "
+               "process = %s, pte = %08lx, vm_flags = %lx, vaddr = %lx\n",
+               errfunc, current->comm, (long)pte_val(pte), vm_flags, vaddr);
+}
+
+/*
  * copy one vm_area from one task to the other. Assumes the page tables
  * already present in the new task to be cleared in the whole range
  * covered by this vma.
@@ -361,25 +376,29 @@ copy_one_pte(struct mm_struct *dst_mm, s
                                spin_unlock(&mmlist_lock);
                        }
                }
-               set_pte_at(dst_mm, addr, dst_pte, pte);
-               return;
+               goto out_set_pte;
        }
 
+       /* If the region is VM_RESERVED, the mapping is not
+        * mapped via rmap - duplicate the pte as is.
+        */
+       if (vm_flags & VM_RESERVED)
+               goto out_set_pte;
+
+       /* If the pte points outside of valid memory but
+        * the region is not VM_RESERVED, we have a problem.
+        */
        pfn = pte_pfn(pte);
-       /* the pte points outside of valid memory, the
-        * mapping is assumed to be good, meaningful
-        * and not mapped via rmap - duplicate the
-        * mapping as is.
-        */
-       page = NULL;
-       if (pfn_valid(pfn))
-               page = pfn_to_page(pfn);
-
-       if (!page || PageReserved(page)) {
-               set_pte_at(dst_mm, addr, dst_pte, pte);
-               return;
+       if (unlikely(!pfn_valid(pfn))) {
+               invalid_pfn(pte, vm_flags, addr);
+               goto out_set_pte; /* try to do something sane */
        }
 
+       page = pfn_to_page(pfn);
+       /* Mappings to zero pages aren't covered by rmap either. */
+       if (page == ZERO_PAGE(addr))
+               goto out_set_pte;
+
        /*
         * If it's a COW mapping, write protect it both
         * in the parent and the child
@@ -400,8 +419,9 @@ copy_one_pte(struct mm_struct *dst_mm, s
        inc_mm_counter(dst_mm, rss);
        if (PageAnon(page))
                inc_mm_counter(dst_mm, anon_rss);
-       set_pte_at(dst_mm, addr, dst_pte, pte);
        page_dup_rmap(page);
+out_set_pte:
+       set_pte_at(dst_mm, addr, dst_pte, pte);
 }
 
 static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -514,7 +534,8 @@ int copy_page_range(struct mm_struct *ds
        return 0;
 }
 
-static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+static void zap_pte_range(struct mmu_gather *tlb,
+                               struct vm_area_struct *vma, pmd_t *pmd,
                                unsigned long addr, unsigned long end,
                                struct zap_details *details)
 {
@@ -528,10 +549,14 @@ static void zap_pte_range(struct mmu_gat
                if (pte_present(ptent)) {
                        struct page *page = NULL;
                        unsigned long pfn = pte_pfn(ptent);
-                       if (pfn_valid(pfn)) {
-                               page = pfn_to_page(pfn);
-                               if (PageReserved(page))
-                                       page = NULL;
+                       if (!(vma->vm_flags & VM_RESERVED)) {
+                               if (unlikely(!pfn_valid(pfn))) {
+                                       invalid_pfn(ptent, vma->vm_flags, addr);
+                               } else {
+                                       page = pfn_to_page(pfn);
+                                       if (page == ZERO_PAGE(addr))
+                                               page = NULL;
+                               }
                        }
                        if (unlikely(details) && page) {
                                /*
@@ -584,7 +609,8 @@ static void zap_pte_range(struct mmu_gat
        pte_unmap(pte - 1);
 }
 
-static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+static inline void zap_pmd_range(struct mmu_gather *tlb,
+                               struct vm_area_struct *vma, pud_t *pud,
                                unsigned long addr, unsigned long end,
                                struct zap_details *details)
 {
@@ -596,11 +622,12 @@ static inline void zap_pmd_range(struct 
                next = pmd_addr_end(addr, end);
                if (pmd_none_or_clear_bad(pmd))
                        continue;
-               zap_pte_range(tlb, pmd, addr, next, details);
+               zap_pte_range(tlb, vma, pmd, addr, next, details);
        } while (pmd++, addr = next, addr != end);
 }
 
-static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+static inline void zap_pud_range(struct mmu_gather *tlb,
+                               struct vm_area_struct *vma, pgd_t *pgd,
                                unsigned long addr, unsigned long end,
                                struct zap_details *details)
 {
@@ -612,7 +639,7 @@ static inline void zap_pud_range(struct 
                next = pud_addr_end(addr, end);
                if (pud_none_or_clear_bad(pud))
                        continue;
-               zap_pmd_range(tlb, pud, addr, next, details);
+               zap_pmd_range(tlb, vma, pud, addr, next, details);
        } while (pud++, addr = next, addr != end);
 }
 
@@ -633,7 +660,7 @@ static void unmap_page_range(struct mmu_
                next = pgd_addr_end(addr, end);
                if (pgd_none_or_clear_bad(pgd))
                        continue;
-               zap_pud_range(tlb, pgd, addr, next, details);
+               zap_pud_range(tlb, vma, pgd, addr, next, details);
        } while (pgd++, addr = next, addr != end);
        tlb_end_vma(tlb, vma);
 }
@@ -933,7 +960,7 @@ int get_user_pages(struct task_struct *t
                        continue;
                }
 
-               if (!vma || (vma->vm_flags & VM_IO)
+               if (!vma || (vma->vm_flags & (VM_IO | VM_RESERVED))
                                || !(flags & vma->vm_flags))
                        return i ? : -EFAULT;
 
@@ -993,8 +1020,7 @@ int get_user_pages(struct task_struct *t
                        if (pages) {
                                pages[i] = page;
                                flush_dcache_page(page);
-                               if (!PageReserved(page))
-                                       page_cache_get(page);
+                               page_cache_get(page);
                        }
                        if (vmas)
                                vmas[i] = vma;
@@ -1098,8 +1124,7 @@ static int remap_pte_range(struct mm_str
                return -ENOMEM;
        do {
                BUG_ON(!pte_none(*pte));
-               if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn)))
-                       set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
+               set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
                pfn++;
        } while (pte++, addr += PAGE_SIZE, addr != end);
        pte_unmap(pte - 1);
@@ -1239,6 +1264,8 @@ static int do_wp_page(struct mm_struct *
        pte_t entry;
        int ret;
 
+       BUG_ON(vma->vm_flags & VM_RESERVED);
+
        if (unlikely(!pfn_valid(pfn))) {
                /*
                 * This should really halt the system so it can be debugged or
@@ -1246,9 +1273,8 @@ static int do_wp_page(struct mm_struct *
                 * data, but for the moment just pretend this is OOM.
                 */
                pte_unmap(page_table);
-               printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n",
-                               address);
                spin_unlock(&mm->page_table_lock);
+               invalid_pfn(pte, vma->vm_flags, address);
                return VM_FAULT_OOM;
        }
        old_page = pfn_to_page(pfn);
@@ -1273,13 +1299,16 @@ static int do_wp_page(struct mm_struct *
        /*
         * Ok, we need to copy. Oh, well..
         */
-       if (!PageReserved(old_page))
+       if (old_page == ZERO_PAGE(address))
+               old_page = NULL;
+       else
                page_cache_get(old_page);
+
        spin_unlock(&mm->page_table_lock);
 
        if (unlikely(anon_vma_prepare(vma)))
                goto no_new_page;
-       if (old_page == ZERO_PAGE(address)) {
+       if (old_page == NULL) {
                new_page = alloc_zeroed_user_highpage(vma, address);
                if (!new_page)
                        goto no_new_page;
@@ -1296,12 +1325,13 @@ static int do_wp_page(struct mm_struct *
        spin_lock(&mm->page_table_lock);
        page_table = pte_offset_map(pmd, address);
        if (likely(pte_same(*page_table, pte))) {
-               if (PageAnon(old_page))
-                       dec_mm_counter(mm, anon_rss);
-               if (PageReserved(old_page))
+               if (old_page == NULL)
                        inc_mm_counter(mm, rss);
-               else
+               else {
                        page_remove_rmap(old_page);
+                       if (PageAnon(old_page))
+                               dec_mm_counter(mm, anon_rss);
+               }
                flush_cache_page(vma, address, pfn);
                break_cow(vma, new_page, address, page_table);
                lru_cache_add_active(new_page);
@@ -1312,13 +1342,16 @@ static int do_wp_page(struct mm_struct *
                ret |= VM_FAULT_WRITE;
        }
        pte_unmap(page_table);
-       page_cache_release(new_page);
-       page_cache_release(old_page);
+       if (old_page) {
+               page_cache_release(new_page);
+               page_cache_release(old_page);
+       }
        spin_unlock(&mm->page_table_lock);
        return ret;
 
 no_new_page:
-       page_cache_release(old_page);
+       if (old_page)
+               page_cache_release(old_page);
        return VM_FAULT_OOM;
 }
 
@@ -1755,7 +1788,7 @@ do_anonymous_page(struct mm_struct *mm, 
        struct page * page = ZERO_PAGE(addr);
 
        /* Read-only mapping of ZERO_PAGE. */
-       entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
+       entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
 
        /* ..except if it's a write access */
        if (write_access) {
@@ -1894,9 +1927,6 @@ retry:
         */
        /* Only go through if we didn't race with anybody else... */
        if (pte_none(*page_table)) {
-               if (!PageReserved(new_page))
-                       inc_mm_counter(mm, rss);
-
                flush_icache_page(vma, new_page);
                entry = mk_pte(new_page, vma->vm_page_prot);
                if (write_access)
@@ -1905,8 +1935,10 @@ retry:
                if (anon) {
                        lru_cache_add_active(new_page);
                        page_add_anon_rmap(new_page, vma, address);
-               } else
+               } else if (!(vma->vm_flags & VM_RESERVED)) {
                        page_add_file_rmap(new_page);
+                       inc_mm_counter(mm, rss);
+               }
                pte_unmap(page_table);
        } else {
                /* One of our sibling threads was faster, back out. */
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -113,7 +113,8 @@ static void bad_page(const char *functio
                        1 << PG_reclaim |
                        1 << PG_slab    |
                        1 << PG_swapcache |
-                       1 << PG_writeback);
+                       1 << PG_writeback |
+                       1 << PG_reserved );
        set_page_count(page, 0);
        reset_page_mapcount(page);
        page->mapping = NULL;
@@ -243,7 +244,6 @@ static inline int page_is_buddy(struct p
 {
        if (PagePrivate(page)           &&
            (page_order(page) == order) &&
-           !PageReserved(page)         &&
             page_count(page) == 0)
                return 1;
        return 0;
@@ -326,7 +326,8 @@ static inline void free_pages_check(cons
                        1 << PG_reclaim |
                        1 << PG_slab    |
                        1 << PG_swapcache |
-                       1 << PG_writeback )))
+                       1 << PG_writeback |
+                       1 << PG_reserved )))
                bad_page(function, page);
        if (PageDirty(page))
                __ClearPageDirty(page);
@@ -454,7 +455,8 @@ static void prep_new_page(struct page *p
                        1 << PG_reclaim |
                        1 << PG_slab    |
                        1 << PG_swapcache |
-                       1 << PG_writeback )))
+                       1 << PG_writeback |
+                       1 << PG_reserved )))
                bad_page(__FUNCTION__, page);
 
        page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
@@ -1011,7 +1013,7 @@ void __pagevec_free(struct pagevec *pvec
 
 fastcall void __free_pages(struct page *page, unsigned int order)
 {
-       if (!PageReserved(page) && put_page_testzero(page)) {
+       if (put_page_testzero(page)) {
                if (order == 0)
                        free_hot_page(page);
                else
@@ -1653,7 +1655,7 @@ void __init memmap_init_zone(unsigned lo
                        continue;
                page = pfn_to_page(pfn);
                set_page_links(page, zone, nid, pfn);
-               set_page_count(page, 0);
+               set_page_count(page, 1);
                reset_page_mapcount(page);
                SetPageReserved(page);
                INIT_LIST_HEAD(&page->lru);
Index: linux-2.6/mm/swap.c
===================================================================
--- linux-2.6.orig/mm/swap.c
+++ linux-2.6/mm/swap.c
@@ -48,7 +48,7 @@ void put_page(struct page *page)
                }
                return;
        }
-       if (!PageReserved(page) && put_page_testzero(page))
+       if (put_page_testzero(page))
                __page_cache_release(page);
 }
 EXPORT_SYMBOL(put_page);
@@ -215,7 +215,7 @@ void release_pages(struct page **pages, 
                struct page *page = pages[i];
                struct zone *pagezone;
 
-               if (PageReserved(page) || !put_page_testzero(page))
+               if (!put_page_testzero(page))
                        continue;
 
                pagezone = page_zone(page);
Index: linux-2.6/mm/fremap.c
===================================================================
--- linux-2.6.orig/mm/fremap.c
+++ linux-2.6/mm/fremap.c
@@ -29,18 +29,21 @@ static inline void zap_pte(struct mm_str
                return;
        if (pte_present(pte)) {
                unsigned long pfn = pte_pfn(pte);
+               struct page *page;
 
                flush_cache_page(vma, addr, pfn);
                pte = ptep_clear_flush(vma, addr, ptep);
-               if (pfn_valid(pfn)) {
-                       struct page *page = pfn_to_page(pfn);
-                       if (!PageReserved(page)) {
-                               if (pte_dirty(pte))
-                                       set_page_dirty(page);
-                               page_remove_rmap(page);
-                               page_cache_release(page);
-                               dec_mm_counter(mm, rss);
-                       }
+               if (unlikely(!pfn_valid(pfn))) {
+                       invalid_pfn(pte, vma->vm_flags, addr);
+                       return;
+               }
+               page = pfn_to_page(pfn);
+               if (page != ZERO_PAGE(addr)) {
+                       if (pte_dirty(pte))
+                               set_page_dirty(page);
+                       page_remove_rmap(page);
+                       dec_mm_counter(mm, rss);
+                       page_cache_release(page);
                }
        } else {
                if (!pte_file(pte))
@@ -65,6 +68,8 @@ int install_page(struct mm_struct *mm, s
        pgd_t *pgd;
        pte_t pte_val;
 
+       BUG_ON(vma->vm_flags & VM_RESERVED);
+
        pgd = pgd_offset(mm, addr);
        spin_lock(&mm->page_table_lock);
        
@@ -122,6 +127,8 @@ int install_file_pte(struct mm_struct *m
        pgd_t *pgd;
        pte_t pte_val;
 
+       BUG_ON(vma->vm_flags & VM_RESERVED);
+
        pgd = pgd_offset(mm, addr);
        spin_lock(&mm->page_table_lock);
        
Index: linux-2.6/mm/msync.c
===================================================================
--- linux-2.6.orig/mm/msync.c
+++ linux-2.6/mm/msync.c
@@ -37,11 +37,11 @@ static void sync_pte_range(struct vm_are
                if (!pte_maybe_dirty(*pte))
                        continue;
                pfn = pte_pfn(*pte);
-               if (!pfn_valid(pfn))
+               if (unlikely(!pfn_valid(pfn))) {
+                       invalid_pfn(*pte, vma->vm_flags, addr);
                        continue;
+               }
                page = pfn_to_page(pfn);
-               if (PageReserved(page))
-                       continue;
 
                if (ptep_clear_flush_dirty(vma, addr, pte) ||
                    page_test_and_clear_dirty(page))
@@ -149,6 +149,9 @@ static int msync_interval(struct vm_area
        if ((flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED))
                return -EBUSY;
 
+       if (vma->vm_flags & VM_RESERVED)
+               return -EINVAL;
+
        if (file && (vma->vm_flags & VM_SHARED)) {
                filemap_sync(vma, addr, end);
 
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c
+++ linux-2.6/mm/rmap.c
@@ -442,8 +442,6 @@ int page_referenced(struct page *page, i
 void page_add_anon_rmap(struct page *page,
        struct vm_area_struct *vma, unsigned long address)
 {
-       BUG_ON(PageReserved(page));
-
        inc_mm_counter(vma->vm_mm, anon_rss);
 
        if (atomic_inc_and_test(&page->_mapcount)) {
@@ -469,8 +467,7 @@ void page_add_anon_rmap(struct page *pag
 void page_add_file_rmap(struct page *page)
 {
        BUG_ON(PageAnon(page));
-       if (!pfn_valid(page_to_pfn(page)) || PageReserved(page))
-               return;
+       BUG_ON(!pfn_valid(page_to_pfn(page)));
 
        if (atomic_inc_and_test(&page->_mapcount))
                inc_page_state(nr_mapped);
@@ -484,8 +481,6 @@ void page_add_file_rmap(struct page *pag
  */
 void page_remove_rmap(struct page *page)
 {
-       BUG_ON(PageReserved(page));
-
        if (atomic_add_negative(-1, &page->_mapcount)) {
                BUG_ON(page_mapcount(page) < 0);
                /*
@@ -643,13 +638,13 @@ static void try_to_unmap_cluster(unsigne
                        continue;
 
                pfn = pte_pfn(*pte);
-               if (!pfn_valid(pfn))
+               if (unlikely(!pfn_valid(pfn))) {
+                       invalid_pfn(*pte, vma->vm_flags, address);
                        continue;
+               }
 
                page = pfn_to_page(pfn);
                BUG_ON(PageAnon(page));
-               if (PageReserved(page))
-                       continue;
 
                if (ptep_clear_flush_young(vma, address, pte))
                        continue;
@@ -812,7 +807,6 @@ int try_to_unmap(struct page *page)
 {
        int ret;
 
-       BUG_ON(PageReserved(page));
        BUG_ON(!PageLocked(page));
 
        if (PageAnon(page))
Index: linux-2.6/drivers/scsi/sg.c
===================================================================
--- linux-2.6.orig/drivers/scsi/sg.c
+++ linux-2.6/drivers/scsi/sg.c
@@ -1887,13 +1887,17 @@ st_unmap_user_pages(struct scatterlist *
        int i;
 
        for (i=0; i < nr_pages; i++) {
-               if (dirtied && !PageReserved(sgl[i].page))
-                       SetPageDirty(sgl[i].page);
-               /* unlock_page(sgl[i].page); */
+               struct page *page = sgl[i].page;
+
+               /* XXX: just for debug. Remove when PageReserved is removed */
+               BUG_ON(PageReserved(page));
+               if (dirtied)
+                       SetPageDirty(page);
+               /* unlock_page(page); */
                /* FIXME: cache flush missing for rw==READ
                 * FIXME: call the correct reference counting function
                 */
-               page_cache_release(sgl[i].page);
+               page_cache_release(page);
        }
 
        return 0;
Index: linux-2.6/drivers/scsi/st.c
===================================================================
--- linux-2.6.orig/drivers/scsi/st.c
+++ linux-2.6/drivers/scsi/st.c
@@ -4431,12 +4431,16 @@ static int sgl_unmap_user_pages(struct s
        int i;
 
        for (i=0; i < nr_pages; i++) {
-               if (dirtied && !PageReserved(sgl[i].page))
-                       SetPageDirty(sgl[i].page);
+               struct page *page = sgl[i].page;
+
+               /* XXX: just for debug. Remove when PageReserved is removed */
+               BUG_ON(PageReserved(page));
+               if (dirtied)
+                       SetPageDirty(page);
                /* FIXME: cache flush missing for rw==READ
                 * FIXME: call the correct reference counting function
                 */
-               page_cache_release(sgl[i].page);
+               page_cache_release(page);
        }
 
        return 0;
Index: linux-2.6/sound/core/pcm_native.c
===================================================================
--- linux-2.6.orig/sound/core/pcm_native.c
+++ linux-2.6/sound/core/pcm_native.c
@@ -2944,8 +2944,7 @@ static struct page * snd_pcm_mmap_status
                return NOPAGE_OOM;
        runtime = substream->runtime;
        page = virt_to_page(runtime->status);
-       if (!PageReserved(page))
-               get_page(page);
+       get_page(page);
        if (type)
                *type = VM_FAULT_MINOR;
        return page;
@@ -2987,8 +2986,7 @@ static struct page * snd_pcm_mmap_contro
                return NOPAGE_OOM;
        runtime = substream->runtime;
        page = virt_to_page(runtime->control);
-       if (!PageReserved(page))
-               get_page(page);
+       get_page(page);
        if (type)
                *type = VM_FAULT_MINOR;
        return page;
@@ -3061,8 +3059,7 @@ static struct page *snd_pcm_mmap_data_no
                vaddr = runtime->dma_area + offset;
                page = virt_to_page(vaddr);
        }
-       if (!PageReserved(page))
-               get_page(page);
+       get_page(page);
        if (type)
                *type = VM_FAULT_MINOR;
        return page;
Index: linux-2.6/mm/mmap.c
===================================================================
--- linux-2.6.orig/mm/mmap.c
+++ linux-2.6/mm/mmap.c
@@ -1077,6 +1077,17 @@ munmap_back:
                error = file->f_op->mmap(file, vma);
                if (error)
                        goto unmap_and_free_vma;
+               if ((vma->vm_flags & (VM_SHARED | VM_WRITE | VM_RESERVED))
+                                               == (VM_WRITE | VM_RESERVED)) {
+                       printk(KERN_WARNING "program %s is using MAP_PRIVATE, "
+                               "PROT_WRITE mmap of VM_RESERVED memory, which "
+                               "is deprecated. Please report this to "
+                               "[EMAIL PROTECTED]",current->comm);
+                       if (vma->vm_ops && vma->vm_ops->close)
+                               vma->vm_ops->close(vma);
+                       error = -EACCES;
+                       goto unmap_and_free_vma;
+               }
        } else if (vm_flags & VM_SHARED) {
                error = shmem_zero_setup(vma);
                if (error)
Index: linux-2.6/mm/mprotect.c
===================================================================
--- linux-2.6.orig/mm/mprotect.c
+++ linux-2.6/mm/mprotect.c
@@ -131,6 +131,14 @@ mprotect_fixup(struct vm_area_struct *vm
                                return -ENOMEM;
                        newflags |= VM_ACCOUNT;
                }
+               if (oldflags & VM_RESERVED) {
+                       BUG_ON(oldflags & VM_WRITE);
+                       printk(KERN_WARNING "program %s is using MAP_PRIVATE, "
+                               "PROT_WRITE mprotect of VM_RESERVED memory, "
+                               "which is deprecated. Please report this to "
+                               "[EMAIL PROTECTED]",current->comm);
+                       return -EACCES;
+               }
        }
 
        newprot = protection_map[newflags & 0xf];
Index: linux-2.6/mm/bootmem.c
===================================================================
--- linux-2.6.orig/mm/bootmem.c
+++ linux-2.6/mm/bootmem.c
@@ -297,6 +297,7 @@ static unsigned long __init free_all_boo
                                if (j + 16 < BITS_PER_LONG)
                                        prefetchw(page + j + 16);
                                __ClearPageReserved(page + j);
+                               set_page_count(page + j, 0);
                        }
                        __free_pages(page, order);
                        i += BITS_PER_LONG;
Index: linux-2.6/mm/mempolicy.c
===================================================================
--- linux-2.6.orig/mm/mempolicy.c
+++ linux-2.6/mm/mempolicy.c
@@ -253,8 +253,10 @@ static int check_pte_range(struct mm_str
                if (!pte_present(*pte))
                        continue;
                pfn = pte_pfn(*pte);
-               if (!pfn_valid(pfn))
+               if (unlikely(!pfn_valid(pfn))) {
+                       invalid_pfn(*pte, -1UL, addr);
                        continue;
+               }
                nid = pfn_to_nid(pfn);
                if (!test_bit(nid, nodes))
                        break;
@@ -326,6 +328,8 @@ check_range(struct mm_struct *mm, unsign
        first = find_vma(mm, start);
        if (!first)
                return ERR_PTR(-EFAULT);
+       if (first->vm_flags & VM_RESERVED)
+               return ERR_PTR(-EACCES);
        prev = NULL;
        for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
                if (!vma->vm_next && vma->vm_end < end)
Index: linux-2.6/arch/ppc64/kernel/vdso.c
===================================================================
--- linux-2.6.orig/arch/ppc64/kernel/vdso.c
+++ linux-2.6/arch/ppc64/kernel/vdso.c
@@ -176,13 +176,13 @@ static struct page * vdso_vma_nopage(str
                return NOPAGE_SIGBUS;
 
        /*
-        * Last page is systemcfg, special handling here, no get_page() a
-        * this is a reserved page
+        * Last page is systemcfg.
         */
        if ((vma->vm_end - address) <= PAGE_SIZE)
-               return virt_to_page(systemcfg);
+               pg = virt_to_page(systemcfg);
+       else
+               pg = virt_to_page(vbase + offset);
 
-       pg = virt_to_page(vbase + offset);
        get_page(pg);
        DBG(" ->page count: %d\n", page_count(pg));
 
@@ -600,6 +600,8 @@ void __init vdso_init(void)
                ClearPageReserved(pg);
                get_page(pg);
        }
+
+       get_page(virt_to_page(systemcfg));
 }
 
 int in_gate_area_no_task(unsigned long addr)
Index: linux-2.6/kernel/power/swsusp.c
===================================================================
--- linux-2.6.orig/kernel/power/swsusp.c
+++ linux-2.6/kernel/power/swsusp.c
@@ -434,15 +434,23 @@ static int save_highmem_zone(struct zone
                        continue;
                page = pfn_to_page(pfn);
                /*
-                * This condition results from rvmalloc() sans vmalloc_32()
-                * and architectural memory reservations. This should be
-                * corrected eventually when the cases giving rise to this
-                * are better understood.
+                * PageReserved results from rvmalloc() sans vmalloc_32()
+                * and architectural memory reservations.
+                *
+                * rvmalloc should not cause this, because all implementations
+                * appear to always be using vmalloc_32 on architectures with
+                * highmem. This is a good thing, because we would like to save
+                * rvmalloc pages.
+                *
+                * It appears to be triggered by pages which do not point to
+                * valid memory (see arch/i386/mm/init.c:one_highpage_init(),
+                * which sets PageReserved if the page does not point to valid
+                * RAM.
+                *
+                * XXX: must remove usage of PageReserved!
                 */
-               if (PageReserved(page)) {
-                       printk("highmem reserved page?!\n");
+               if (PageReserved(page))
                        continue;
-               }
                BUG_ON(PageNosave(page));
                if (PageNosaveFree(page))
                        continue;
@@ -528,10 +536,9 @@ static int saveable(struct zone * zone, 
                return 0;
 
        page = pfn_to_page(pfn);
-       BUG_ON(PageReserved(page) && PageNosave(page));
        if (PageNosave(page))
                return 0;
-       if (PageReserved(page) && pfn_is_nosave(pfn)) {
+       if (pfn_is_nosave(pfn)) {
                pr_debug("[nosave pfn 0x%lx]", pfn);
                return 0;
        }
Index: linux-2.6/mm/shmem.c
===================================================================
--- linux-2.6.orig/mm/shmem.c
+++ linux-2.6/mm/shmem.c
@@ -1523,7 +1523,8 @@ static void do_shmem_file_read(struct fi
                index += offset >> PAGE_CACHE_SHIFT;
                offset &= ~PAGE_CACHE_MASK;
 
-               page_cache_release(page);
+               if (page != ZERO_PAGE(0))
+                       page_cache_release(page);
                if (ret != nr || !desc->count)
                        break;
 

Reply via email to