Below is the description that I described couple of days back in a mail about the stale data issue on IA-64.
Seth, Rohit <> wrote on Tuesday, March 15, 2005 9:44 AM: > Recently on IA-64, we have found an issue where old data could be used > by apps. The sequence of operations includes few mprotects from user > space (glibc) goes like this: > > 1- The text region of an executable is mmaped using > PROT_READ|PROT_EXEC. As a result, a shared page is allocated to user. > > 2- User then requests the text region to be mprotected with > PROT_READ|PROT_WRITE. Kernel removes the execute permission and > leave the read permission on the text region. > > 3- Subsequent write operation by user results in page fault and > eventually resulting in COW break. User gets a new private copy of the > page. At this point kernel marks the new page for defered flush. > > 4- User then request the text region to be mprotected back with > PROT_READ|PROT_EXEC. mprotect suppport code in kernel, flushes the > caches, updates the PTEs and then flushes the TLBs. Though after > updating the PTEs with new permissions, we don't let the arch > specific code know about the new mappings (through update_mmu_cache > like routine). IA-64 typically uses update_mmu_cache to check for the > defered flush flag (that got set in step 3) to maintain cache > coherency lazily (The local I and D caches on IA-64 are incoherent). > > DavidM suggeested that we would need to add a hook in the function > change_pte_range in mm/mprotect.c This would let the architecture > specific code to look at the new ptes to decide if it needs to update > any other architectual/kernel state based on the updated (new > permissions) PTE values. > We have added a new hook lazy_mmu_prot_update(pte_t) that gets called when protection bits in PTEs change. This hook provides an opportunity to arch specific code to do needful. On IA-64 this will be used for lazily making the I and D caches coherent. Signed-off-by: David Mosberger <[EMAIL PROTECTED]> Signed-off-by: Rohit Seth <[EMAIL PROTECTED]> Validate on IA-64 and x86 platforms. diff -Naru linux-2.6.11/include/asm-generic/pgtable.h linux-2.6.11.new/include/asm-generic/pgtable.h --- linux-2.6.11/include/asm-generic/pgtable.h 2005-03-01 23:37:54.000000000 -0800 +++ linux-2.6.11.new/include/asm-generic/pgtable.h 2005-03-17 21:12:21.181174495 -0800 @@ -134,4 +134,7 @@ #define pgd_offset_gate(mm, addr) pgd_offset(mm, addr) #endif +#ifndef __HAVE_ARCH_LAZY_MMU_UPDATE +#define lazy_mmu_prot_update(pte) do { } while (0) +#endif #endif /* _ASM_GENERIC_PGTABLE_H */ diff -Naru linux-2.6.11/include/asm-ia64/pgtable.h linux-2.6.11.new/include/asm-ia64/pgtable.h --- linux-2.6.11/include/asm-ia64/pgtable.h 2005-03-01 23:37:53.000000000 -0800 +++ linux-2.6.11.new/include/asm-ia64/pgtable.h 2005-03-18 02:41:24.202417018 -0800 @@ -420,6 +420,7 @@ { return pte_val(a) == pte_val(b); } +#define update_mmu_cache(vma,address,pte) do { } while (0) extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern void paging_init (void); @@ -482,7 +483,7 @@ * information. However, we use this routine to take care of any (delayed) i-cache * flushing that may be necessary. */ -extern void update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr, pte_t pte); +extern void lazy_mmu_prot_update (pte_t pte); #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS /* @@ -561,6 +562,7 @@ #define __HAVE_ARCH_PTEP_MKDIRTY #define __HAVE_ARCH_PTE_SAME #define __HAVE_ARCH_PGD_OFFSET_GATE +#define __HAVE_ARCH_LAZY_MMU_UPDATE #include <asm-generic/pgtable.h> #include <asm-generic/pgtable-nopud.h> diff -Naru linux-2.6.11/mm/memory.c linux-2.6.11.new/mm/memory.c --- linux-2.6.11/mm/memory.c 2005-03-01 23:38:08.000000000 -0800 +++ linux-2.6.11.new/mm/memory.c 2005-03-18 01:49:05.498353905 -0800 @@ -1252,6 +1252,7 @@ vma); ptep_establish(vma, address, page_table, entry); update_mmu_cache(vma, address, entry); + lazy_mmu_prot_update(entry); } /* @@ -1304,6 +1305,7 @@ vma); ptep_set_access_flags(vma, address, page_table, entry, 1); update_mmu_cache(vma, address, entry); + lazy_mmu_prot_update(entry); pte_unmap(page_table); spin_unlock(&mm->page_table_lock); return VM_FAULT_MINOR; @@ -1829,6 +1831,7 @@ /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, addr, entry); + lazy_mmu_prot_update(entry); spin_unlock(&mm->page_table_lock); out: return VM_FAULT_MINOR; @@ -1956,6 +1959,7 @@ /* no need to invalidate: a not-present page shouldn't be cached */ update_mmu_cache(vma, address, entry); + lazy_mmu_prot_update(entry); spin_unlock(&mm->page_table_lock); out: return ret; @@ -2050,6 +2054,7 @@ entry = pte_mkyoung(entry); ptep_set_access_flags(vma, address, pte, entry, write_access); update_mmu_cache(vma, address, entry); + lazy_mmu_prot_update(entry); pte_unmap(pte); spin_unlock(&mm->page_table_lock); return VM_FAULT_MINOR; --- linux-2.6.11/arch/ia64/hp/common/sba_iommu.c 2005-03-01 23:37:49.000000000 -0800 +++ linux-2.6.11.new/arch/ia64/hp/common/sba_iommu.c 2005-03-18 02:55:29.661391037 -0800 @@ -761,7 +761,7 @@ #ifdef ENABLE_MARK_CLEAN /** * Since DMA is i-cache coherent, any (complete) pages that were written via - * DMA can be marked as "clean" so that update_mmu_cache() doesn't have to + * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to * flush them when they get mapped into an executable vm-area. */ static void --- linux-2.6.11/arch/ia64/lib/swiotlb.c 2005-03-01 23:38:17.000000000 -0800 +++ linux-2.6.11.new/arch/ia64/lib/swiotlb.c 2005-03-18 02:55:58.480726621 -0800 @@ -444,7 +444,7 @@ /* * Since DMA is i-cache coherent, any (complete) pages that were written via - * DMA can be marked as "clean" so that update_mmu_cache() doesn't have to + * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to * flush them when they get mapped into an executable vm-area. */ static void --- linux-2.6.11/mm/mprotect.c 2005-03-01 23:38:37.000000000 -0800 +++ linux-2.6.11.new/mm/mprotect.c 2005-03-18 16:31:47.874658241 -0800 @@ -52,8 +52,9 @@ * bits by wiping the pte and then setting the new pte * into place. */ - entry = ptep_get_and_clear(pte); - set_pte(pte, pte_modify(entry, newprot)); + entry = pte_modify(ptep_get_and_clear(pte), newprot); + set_pte(pte, entry); + lazy_mmu_prot_update(entry); } address += PAGE_SIZE; pte++; --- linux-2.6.11/Documentation/cachetlb.txt 2005-03-01 23:37:59.000000000 -0800 +++ linux-2.6.11.new/Documentation/cachetlb.txt 2005-03-18 19:07:29.565950055 -0800 @@ -142,6 +142,11 @@ The ia64 sn2 platform is one example of a platform that uses this interface. +8) void lazy_mmu_prot_update (pte_t pte) + This interface is called whenever the protection on + any user PTEs change. This interface provides a notification + to architecture specific code to take appropiate action. + Next, we have the cache flushing interfaces. In general, when Linux is changing an existing virtual-->physical mapping to a new value, --- linux-2.6.11/arch/ia64/mm/init.c 2005-03-01 23:38:07.000000000 -0800 +++ linux-2.6.11.new/arch/ia64/mm/init.c 2005-03-18 18:42:58.915577446 -0800 @@ -76,7 +76,7 @@ } void -update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr, pte_t pte) +lazy_mmu_prot_update (pte_t pte) { unsigned long addr; struct page *page; @@ -85,7 +85,7 @@ return; /* not an executable page... */ page = pte_page(pte); - /* don't use VADDR: it may not be mapped on this CPU (or may have just been flushed): */ + addr = (unsigned long) page_address(page); if (test_bit(PG_arch_1, &page->flags))