When either calling change_page_attr() with the default attributes
pages in the direct mapping have and a page's attributes already were
set to the default or when changing the attributes from one non-default
value to another, the reference counting broke, leading to either
premature restoration of a large page or missing the opportunity to do
so.

At the same time, make __PHYSICAL_MASK_SHIFT on 64-bits the value it
architecturally ought to have.

Signed-off-by: Jan Beulich <[EMAIL PROTECTED]>
Cc: Andi Kleen <[EMAIL PROTECTED]>

 arch/x86/mm/ioremap_64.c     |    4 +-
 arch/x86/mm/pageattr_32.c    |   84 +++++++++++++++++++++++++++++--------------
 arch/x86/mm/pageattr_64.c    |   57 +++++++++++++++++++----------
 include/asm-x86/page_32.h    |   10 +++++
 include/asm-x86/page_64.h    |    2 -
 include/asm-x86/pgtable_32.h |    3 +
 include/asm-x86/pgtable_64.h |    4 +-
 7 files changed, 114 insertions(+), 50 deletions(-)

--- linux-2.6.24-rc5/arch/x86/mm/ioremap_64.c   2007-12-12 11:28:18.000000000 
+0100
+++ 2.6.24-rc5-x86-change_page_attr/arch/x86/mm/ioremap_64.c    2007-12-04 
16:01:11.000000000 +0100
@@ -48,7 +48,7 @@ ioremap_change_attr(unsigned long phys_a
                 * Must use a address here and not struct page because the phys 
addr
                 * can be a in hole between nodes and not have an memmap entry.
                 */
-               err = 
change_page_attr_addr(vaddr,npages,__pgprot(__PAGE_KERNEL|flags));
+               err = 
change_page_attr_addr(vaddr,npages,MAKE_GLOBAL(__PAGE_KERNEL|flags));
                if (!err)
                        global_flush_tlb();
        }
@@ -199,7 +199,7 @@ void iounmap(volatile void __iomem *addr
 
        /* Reset the direct mapping. Can block */
        if (p->flags >> 20)
-               ioremap_change_attr(p->phys_addr, p->size, 0);
+               ioremap_change_attr(p->phys_addr, get_vm_area_size(p), 0);
 
        /* Finally remove it */
        o = remove_vm_area((void *)addr);
--- linux-2.6.24-rc5/arch/x86/mm/pageattr_32.c  2007-12-12 11:28:18.000000000 
+0100
+++ 2.6.24-rc5-x86-change_page_attr/arch/x86/mm/pageattr_32.c   2007-12-04 
16:01:11.000000000 +0100
@@ -116,24 +116,22 @@ static void set_pmd_pte(pte_t *kpte, uns
        spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
+static pgprot_t _ref_prot[KERNEL_PGD_PTRS * PTRS_PER_PMD];
+#define ref_prot(addr) _ref_prot[__pa(addr) >> PMD_SHIFT]
+
 /* 
  * No more special protections in this 2/4MB area - revert to a
  * large page again. 
  */
 static inline void revert_page(struct page *kpte_page, unsigned long address)
 {
-       pgprot_t ref_prot;
        pte_t *linear;
 
-       ref_prot =
-       ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
-               ? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE;
-
        linear = (pte_t *)
                pmd_offset(pud_offset(pgd_offset_k(address), address), address);
        set_pmd_pte(linear,  address,
-                   pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT,
-                           ref_prot));
+                   pte_mkhuge(pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> 
PAGE_SHIFT,
+                                      ref_prot(address))));
 }
 
 static inline void save_page(struct page *kpte_page)
@@ -142,12 +140,22 @@ static inline void save_page(struct page
                list_add(&kpte_page->lru, &df_list);
 }
 
+static inline int pgprot_match(pgprot_t prot1, pgprot_t prot2)
+{
+       return !((pgprot_val(prot1) ^ pgprot_val(prot2))
+#ifdef CONFIG_X86_PAE
+                & __supported_pte_mask
+#endif
+                & ~(_PAGE_ACCESSED|_PAGE_DIRTY));
+}
+
 static int
 __change_page_attr(struct page *page, pgprot_t prot)
 { 
        pte_t *kpte; 
        unsigned long address;
        struct page *kpte_page;
+       pgprot_t old_prot, ref_prot;
 
        BUG_ON(PageHighMem(page));
        address = (unsigned long)page_address(page);
@@ -159,29 +167,31 @@ __change_page_attr(struct page *page, pg
        BUG_ON(PageLRU(kpte_page));
        BUG_ON(PageCompound(kpte_page));
 
-       if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { 
+       old_prot = pte_pgprot(pte_clrhuge(*kpte));
+       ref_prot = ref_prot(address);
+       if (!pgprot_match(prot, ref_prot)) {
                if (!pte_huge(*kpte)) {
                        set_pte_atomic(kpte, mk_pte(page, prot)); 
                } else {
-                       pgprot_t ref_prot;
-                       struct page *split;
-
-                       ref_prot =
-                       ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
-                               ? PAGE_KERNEL_EXEC : PAGE_KERNEL;
-                       split = split_large_page(address, prot, ref_prot);
-                       if (!split)
+                       BUG_ON(!pgprot_match(old_prot, ref_prot));
+                       kpte_page = split_large_page(address, prot, ref_prot);
+                       if (!kpte_page)
                                return -ENOMEM;
-                       set_pmd_pte(kpte,address,mk_pte(split, ref_prot));
-                       kpte_page = split;
+                       set_pmd_pte(kpte, address,
+                                   mk_pte(kpte_page, PAGE_KERNEL_EXEC));
+               }
+               if (!PageReserved(kpte_page)
+                   && pgprot_match(old_prot, ref_prot))
+                       page_private(kpte_page)++;
+       } else if (!pgprot_match(ref_prot, old_prot)) {
+               BUG_ON(pte_huge(*kpte));
+               set_pte_atomic(kpte, mk_pte(page, ref_prot));
+               if (!PageReserved(kpte_page)) {
+                       BUG_ON(page_private(kpte_page) == 0);
+                       page_private(kpte_page)--;
                }
-               page_private(kpte_page)++;
-       } else if (!pte_huge(*kpte)) {
-               set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
-               BUG_ON(page_private(kpte_page) == 0);
-               page_private(kpte_page)--;
        } else
-               BUG();
+               return 0;
 
        /*
         * If the pte was reserved, it means it was created at boot
@@ -190,8 +200,17 @@ __change_page_attr(struct page *page, pg
         */
 
        save_page(kpte_page);
-       if (!PageReserved(kpte_page)) {
-               if (cpu_has_pse && (page_private(kpte_page) == 0)) {
+       if (!PageReserved(kpte_page) && cpu_has_pse) {
+               if (page_private(kpte_page) == PTRS_PER_PTE) {
+                       unsigned i;
+
+                       kpte = page_address(kpte_page);
+                       for (i = 0; i < PTRS_PER_PTE; ++i, ++kpte)
+                               if (pgprot_match(pte_pgprot(*kpte), prot))
+                                       page_private(kpte_page)--;
+                       ref_prot(address) = prot;
+               }
+               if (page_private(kpte_page) == 0) {
                        paravirt_release_pt(page_to_pfn(kpte_page));
                        revert_page(kpte_page, address);
                }
@@ -222,8 +241,21 @@ int change_page_attr(struct page *page, 
        int err = 0; 
        int i; 
        unsigned long flags;
+       static char first = 1;
 
        spin_lock_irqsave(&cpa_lock, flags);
+
+       if (unlikely(first)) {
+               unsigned long addr = PAGE_OFFSET & PMD_MASK;
+
+               /* This must match is_kernel_text(). */
+               for (; addr <= (unsigned long)__init_end; addr += PMD_SIZE)
+                       ref_prot(addr) = PAGE_KERNEL_EXEC;
+               for (; addr > PAGE_OFFSET; addr += PMD_SIZE)
+                       ref_prot(addr) = PAGE_KERNEL;
+               first = 0;
+       }
+
        for (i = 0; i < numpages; i++, page++) { 
                err = __change_page_attr(page, prot);
                if (err) 
--- linux-2.6.24-rc5/arch/x86/mm/pageattr_64.c  2007-12-12 11:28:18.000000000 
+0100
+++ 2.6.24-rc5-x86-change_page_attr/arch/x86/mm/pageattr_64.c   2007-12-04 
16:01:11.000000000 +0100
@@ -98,8 +98,14 @@ static inline void save_page(struct page
                list_add(&fpage->lru, &deferred_pages);
 }
 
+/* protected by init_mm.mmap_sem */
+static pgprot_t kref_prot[] =  {
+       [0 ... (KERNEL_TEXT_SIZE - 1) >> PMD_SHIFT] = PAGE_KERNEL_EXEC
+};
+#define kref_prot(kaddr) kref_prot[((kaddr) - __START_KERNEL_map) >> PMD_SHIFT]
+
 /* 
- * No more special protections in this 2/4MB area - revert to a
+ * No more special protections in this 2MB area - revert to a
  * large page again. 
  */
 static void revert_page(unsigned long address, pgprot_t ref_prot)
@@ -122,55 +128,68 @@ static void revert_page(unsigned long ad
        set_pte((pte_t *)pmd, large_pte);
 }      
 
+static inline int pgprot_match(pgprot_t prot1, pgprot_t prot2)
+{
+       return !((pgprot_val(prot1) ^ pgprot_val(prot2))
+                & __supported_pte_mask & ~(_PAGE_ACCESSED|_PAGE_DIRTY));
+}
+
 static int
 __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
                                   pgprot_t ref_prot)
 { 
        pte_t *kpte; 
        struct page *kpte_page;
-       pgprot_t ref_prot2;
+       pgprot_t old_prot;
 
        kpte = lookup_address(address);
        if (!kpte) return 0;
-       kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
+       kpte_page = virt_to_page(kpte);
        BUG_ON(PageLRU(kpte_page));
        BUG_ON(PageCompound(kpte_page));
-       if (pgprot_val(prot) != pgprot_val(ref_prot)) { 
+       old_prot = pte_pgprot(pte_clrhuge(*kpte));
+       if (!pgprot_match(prot, ref_prot)) {
                if (!pte_huge(*kpte)) {
                        set_pte(kpte, pfn_pte(pfn, prot));
                } else {
-                       /*
-                        * split_large_page will take the reference for this
-                        * change_page_attr on the split page.
-                        */
-                       struct page *split;
-                       ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
-                       split = split_large_page(address, prot, ref_prot2);
-                       if (!split)
+                       BUG_ON(!pgprot_match(old_prot, ref_prot));
+                       kpte_page = split_large_page(address, prot, ref_prot);
+                       if (!kpte_page)
                                return -ENOMEM;
-                       pgprot_val(ref_prot2) &= ~_PAGE_NX;
-                       set_pte(kpte, mk_pte(split, ref_prot2));
-                       kpte_page = split;
+                       set_pte(kpte, mk_pte(kpte_page, PAGE_KERNEL_EXEC));
                }
-               page_private(kpte_page)++;
-       } else if (!pte_huge(*kpte)) {
+               if (pgprot_match(old_prot, ref_prot))
+                       page_private(kpte_page)++;
+       } else if (!pgprot_match(ref_prot, old_prot)) {
+               BUG_ON(pte_huge(*kpte));
                set_pte(kpte, pfn_pte(pfn, ref_prot));
                BUG_ON(page_private(kpte_page) == 0);
                page_private(kpte_page)--;
        } else
-               BUG();
+               return 0;
 
        /* on x86-64 the direct mapping set at boot is not using 4k pages */
        BUG_ON(PageReserved(kpte_page));
 
        save_page(kpte_page);
+       if (page_private(kpte_page) == PTRS_PER_PTE
+           && address >= __START_KERNEL_map
+           && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
+               unsigned i;
+
+               kpte = page_address(kpte_page);
+               for (i = 0; i < PTRS_PER_PTE; ++i, ++kpte)
+                       if (pgprot_match(pte_pgprot(*kpte), prot))
+                               page_private(kpte_page)--;
+               kref_prot(address) = ref_prot = prot;
+       }
        if (page_private(kpte_page) == 0)
                revert_page(address, ref_prot);
        return 0;
 } 
 
 /*
- * Change the page attributes of an page in the linear mapping.
+ * Change the page attributes of a page in the linear mapping.
  *
  * This should be used when a page is mapped with a different caching policy
  * than write-back somewhere - some CPUs do not like it when mappings with
--- linux-2.6.24-rc5/include/asm-x86/page_32.h  2007-12-12 11:29:30.000000000 
+0100
+++ 2.6.24-rc5-x86-change_page_attr/include/asm-x86/page_32.h   2007-12-04 
16:01:11.000000000 +0100
@@ -6,6 +6,16 @@
 #define PAGE_SIZE      (1UL << PAGE_SHIFT)
 #define PAGE_MASK      (~(PAGE_SIZE-1))
 
+#ifdef CONFIG_X86_PAE
+#define __PHYSICAL_MASK_SHIFT  52
+#define __PHYSICAL_MASK                ((1ULL << __PHYSICAL_MASK_SHIFT) - 1)
+#define PHYSICAL_PAGE_MASK     (~(PAGE_SIZE - 1ULL) & __PHYSICAL_MASK)
+#else
+#define __PHYSICAL_MASK_SHIFT  32
+#define __PHYSICAL_MASK                (~0UL)
+#define PHYSICAL_PAGE_MASK     (PAGE_MASK & __PHYSICAL_MASK)
+#endif
+
 #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
 #define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
 
--- linux-2.6.24-rc5/include/asm-x86/page_64.h  2007-12-12 11:29:30.000000000 
+0100
+++ 2.6.24-rc5-x86-change_page_attr/include/asm-x86/page_64.h   2007-12-04 
16:01:11.000000000 +0100
@@ -98,7 +98,7 @@ extern unsigned long phys_base;
 #define PAGE_ALIGN(addr)       (((addr)+PAGE_SIZE-1)&PAGE_MASK)
 
 /* See Documentation/x86_64/mm.txt for a description of the memory map. */
-#define __PHYSICAL_MASK_SHIFT  46
+#define __PHYSICAL_MASK_SHIFT  52
 #define __PHYSICAL_MASK                ((_AC(1,UL) << __PHYSICAL_MASK_SHIFT) - 
1)
 #define __VIRTUAL_MASK_SHIFT   48
 #define __VIRTUAL_MASK         ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1)
--- linux-2.6.24-rc5/include/asm-x86/pgtable_32.h       2007-12-12 
11:29:30.000000000 +0100
+++ 2.6.24-rc5-x86-change_page_attr/include/asm-x86/pgtable_32.h        
2007-12-04 16:01:11.000000000 +0100
@@ -228,11 +228,14 @@ static inline int pte_file(pte_t pte)             {
 static inline pte_t pte_mkclean(pte_t pte)     { (pte).pte_low &= 
~_PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkold(pte_t pte)       { (pte).pte_low &= 
~_PAGE_ACCESSED; return pte; }
 static inline pte_t pte_wrprotect(pte_t pte)   { (pte).pte_low &= ~_PAGE_RW; 
return pte; }
+static inline pte_t pte_clrhuge(pte_t pte)     { (pte).pte_low &= ~_PAGE_PSE; 
return pte; }
 static inline pte_t pte_mkdirty(pte_t pte)     { (pte).pte_low |= _PAGE_DIRTY; 
return pte; }
 static inline pte_t pte_mkyoung(pte_t pte)     { (pte).pte_low |= 
_PAGE_ACCESSED; return pte; }
 static inline pte_t pte_mkwrite(pte_t pte)     { (pte).pte_low |= _PAGE_RW; 
return pte; }
 static inline pte_t pte_mkhuge(pte_t pte)      { (pte).pte_low |= _PAGE_PSE; 
return pte; }
 
+#define pte_pgprot(pte) (__pgprot(pte_val(pte) & ~PHYSICAL_PAGE_MASK))
+
 #ifdef CONFIG_X86_PAE
 # include <asm/pgtable-3level.h>
 #else
--- linux-2.6.24-rc5/include/asm-x86/pgtable_64.h       2007-12-12 
11:29:30.000000000 +0100
+++ 2.6.24-rc5-x86-change_page_attr/include/asm-x86/pgtable_64.h        
2007-12-04 16:01:11.000000000 +0100
@@ -344,9 +344,9 @@ static inline int pmd_large(pmd_t pte) {
 #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
 #define pmd_pfn(x)  ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
 
-#define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
+#define pte_to_pgoff(pte) (pte_val(pte) >> PAGE_SHIFT)
 #define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE })
-#define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT
+#define PTE_FILE_MAX_BITS (64 - PAGE_SHIFT)
 
 /* PTE - Level 1 access. */
 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to