Hi, I think there might be a problem, but don't take this as a final patch because I can make it nicer if we are agreed there is a problem.
One thing I like about it is that it ties in the anonymous page handling with the rest of the page management, by marking anon pages as uptodate when they _are_ uptodate, rather than when our implementation requires that they be marked as such. Doing this let me get rid of the smp_wmb's in the page copying functions, which always vaguely troubled me. Any comments? Index: linux-2.6/include/linux/highmem.h =================================================================== --- linux-2.6.orig/include/linux/highmem.h +++ linux-2.6/include/linux/highmem.h @@ -57,8 +57,6 @@ static inline void clear_user_highpage(s void *addr = kmap_atomic(page, KM_USER0); clear_user_page(addr, vaddr, page); kunmap_atomic(addr, KM_USER0); - /* Make sure this page is cleared on other CPU's too before using it */ - smp_wmb(); } #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE @@ -108,8 +106,6 @@ static inline void copy_user_highpage(st copy_user_page(vto, vfrom, vaddr, to); kunmap_atomic(vfrom, KM_USER0); kunmap_atomic(vto, KM_USER1); - /* Make sure this page is cleared on other CPU's too before using it */ - smp_wmb(); } #endif Index: linux-2.6/include/linux/page-flags.h =================================================================== --- linux-2.6.orig/include/linux/page-flags.h +++ linux-2.6/include/linux/page-flags.h @@ -127,15 +127,27 @@ #define TestClearPageReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags) #define PageUptodate(page) test_bit(PG_uptodate, &(page)->flags) -#ifdef CONFIG_S390 static inline void SetPageUptodate(struct page *page) { +#ifdef CONFIG_S390 if (!test_and_set_bit(PG_uptodate, &page->flags)) page_test_and_clear_dirty(page); -} #else -#define SetPageUptodate(page) set_bit(PG_uptodate, &(page)->flags) + /* + * Memory barrier must be issued before setting the PG_uptodate bit, + * so all previous writes that served to bring the page uptodate are + * visible before PageUptodate becomes true. + * + * S390 is guaranteed to have a barrier in the test_and_set operation + * (see Documentation/atomic_ops.txt). + * + * XXX: does this memory barrier need to be anything special to + * handle things like DMA writes into the page? + */ + smp_wmb(); + set_bit(PG_uptodate, &(page)->flags); #endif +} #define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags) #define PageDirty(page) test_bit(PG_dirty, &(page)->flags) Index: linux-2.6/mm/hugetlb.c =================================================================== --- linux-2.6.orig/mm/hugetlb.c +++ linux-2.6/mm/hugetlb.c @@ -443,6 +443,7 @@ static int hugetlb_cow(struct mm_struct spin_unlock(&mm->page_table_lock); copy_huge_page(new_page, old_page, address, vma); + SetPageUptodate(new_page); spin_lock(&mm->page_table_lock); ptep = huge_pte_offset(mm, address & HPAGE_MASK); @@ -506,6 +507,7 @@ retry: } else lock_page(page); } + SetPageUptodate(page); spin_lock(&mm->page_table_lock); size = i_size_read(mapping->host) >> HPAGE_SHIFT; Index: linux-2.6/mm/memory.c =================================================================== --- linux-2.6.orig/mm/memory.c +++ linux-2.6/mm/memory.c @@ -1463,10 +1463,8 @@ static inline void cow_user_page(struct memset(kaddr, 0, PAGE_SIZE); kunmap_atomic(kaddr, KM_USER0); flush_dcache_page(dst); - return; - - } - copy_user_highpage(dst, src, va, vma); + } else + copy_user_highpage(dst, src, va, vma); } /* @@ -1579,6 +1577,7 @@ gotten: goto oom; cow_user_page(new_page, old_page, address, vma); } + SetPageUptodate(new_page); /* * Re-check the pte - we dropped the lock @@ -2097,6 +2096,7 @@ static int do_anonymous_page(struct mm_s page = alloc_zeroed_user_highpage(vma, address); if (!page) goto oom; + SetPageUptodate(page); entry = mk_pte(page, vma->vm_page_prot); entry = maybe_mkwrite(pte_mkdirty(entry), vma); @@ -2203,6 +2203,7 @@ retry: copy_user_highpage(page, new_page, address, vma); page_cache_release(new_page); new_page = page; + SetPageUptodate(new_page); anon = 1; } else { Index: linux-2.6/mm/filemap.c =================================================================== --- linux-2.6.orig/mm/filemap.c +++ linux-2.6/mm/filemap.c @@ -935,6 +935,13 @@ find_page: if (!PageUptodate(page)) goto page_not_up_to_date; page_ok: + /* + * Must ensure that the data we read out of the page is loaded + * _after_ we've loaded page->flags to check for PageUptodate. + * See include/linux/page-flags.h:SetPageUptodate() for the + * other side of the story. + */ + smp_rmb(); /* If users can be writing to this page using arbitrary * virtual addresses, take care about potential aliasing @@ -1422,6 +1429,11 @@ retry_find: success: /* + * Must order memory for the same reason as do_generic_mapping_read + */ + smp_rmb(); + + /* * Found the page and have a reference on it. */ mark_page_accessed(page); @@ -1564,6 +1576,11 @@ retry_find: success: /* + * Must order memory for the same reason as do_generic_mapping_read + */ + smp_rmb(); + + /* * Found the page and have a reference on it. */ mark_page_accessed(page); Index: linux-2.6/mm/page_io.c =================================================================== --- linux-2.6.orig/mm/page_io.c +++ linux-2.6/mm/page_io.c @@ -134,7 +134,7 @@ int swap_readpage(struct file *file, str int ret = 0; BUG_ON(!PageLocked(page)); - ClearPageUptodate(page); + BUG_ON(PageUptodate(page)); bio = get_swap_bio(GFP_KERNEL, page_private(page), page, end_swap_bio_read); if (bio == NULL) { Index: linux-2.6/mm/swap_state.c =================================================================== --- linux-2.6.orig/mm/swap_state.c +++ linux-2.6/mm/swap_state.c @@ -149,6 +149,7 @@ int add_to_swap(struct page * page, gfp_ int err; BUG_ON(!PageLocked(page)); + BUG_ON(!PageUptodate(page)); for (;;) { entry = get_swap_page(); @@ -171,7 +172,6 @@ int add_to_swap(struct page * page, gfp_ switch (err) { case 0: /* Success */ - SetPageUptodate(page); SetPageDirty(page); INC_CACHE_INFO(add_total); return 1; - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/