In order to make informed placement and reclaim decisions, the page
cache allocation requires the shadow information of refaulting pages.

Every site that does a find_or_create()-style page cache allocation is
converted to pass the shadow page found in the faulting slot of the
radix tree to page_cache_alloc(), where it can be used in subsequent
patches to influence reclaim behavior.

Signed-off-by: Johannes Weiner <han...@cmpxchg.org>
---
 drivers/staging/lustre/lustre/llite/dir.c |  2 +-
 fs/btrfs/compression.c                    |  2 +-
 fs/cachefiles/rdwr.c                      | 13 +++++----
 fs/ceph/xattr.c                           |  2 +-
 fs/logfs/readwrite.c                      |  6 ++--
 fs/ntfs/file.c                            |  7 +++--
 fs/splice.c                               |  6 ++--
 include/linux/pagemap.h                   | 20 ++++++++------
 mm/filemap.c                              | 46 +++++++++++++++++--------------
 mm/readahead.c                            |  2 +-
 net/ceph/pagelist.c                       |  4 +--
 net/ceph/pagevec.c                        |  2 +-
 12 files changed, 61 insertions(+), 51 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/dir.c 
b/drivers/staging/lustre/lustre/llite/dir.c
index 2ca8c45..ac63e4d 100644
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ b/drivers/staging/lustre/lustre/llite/dir.c
@@ -172,7 +172,7 @@ static int ll_dir_filler(void *_hash, struct page *page0)
                max_pages = 1;
        }
        for (npages = 1; npages < max_pages; npages++) {
-               page = page_cache_alloc_cold(inode->i_mapping);
+               page = page_cache_alloc_cold(inode->i_mapping, NULL);
                if (!page)
                        break;
                page_pool[npages] = page;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 5ce2c0f..f23bb17 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -483,7 +483,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
                }
 
                page = __page_cache_alloc(mapping_gfp_mask(mapping) &
-                                                               ~__GFP_FS);
+                                         ~__GFP_FS, page);
                if (!page)
                        break;
 
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index ebaff36..1b34a42 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -254,13 +254,13 @@ static int cachefiles_read_backing_file_one(struct 
cachefiles_object *object,
        newpage = NULL;
 
        for (;;) {
-               backpage = find_get_page(bmapping, netpage->index);
-               if (backpage)
+               backpage = __find_get_page(bmapping, netpage->index);
+               if (backpage && !radix_tree_exceptional_entry(backpage))
                        goto backing_page_already_present;
 
                if (!newpage) {
                        newpage = __page_cache_alloc(cachefiles_gfp |
-                                                    __GFP_COLD);
+                                                    __GFP_COLD, backpage);
                        if (!newpage)
                                goto nomem_monitor;
                }
@@ -499,13 +499,14 @@ static int cachefiles_read_backing_file(struct 
cachefiles_object *object,
                }
 
                for (;;) {
-                       backpage = find_get_page(bmapping, netpage->index);
-                       if (backpage)
+                       backpage = __find_get_page(bmapping, netpage->index);
+                       if (backpage && !radix_tree_exceptional_entry(backpage))
                                goto backing_page_already_present;
 
                        if (!newpage) {
                                newpage = __page_cache_alloc(cachefiles_gfp |
-                                                            __GFP_COLD);
+                                                            __GFP_COLD,
+                                                            backpage);
                                if (!newpage)
                                        goto nomem;
                        }
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index be661d8..a5d2b86 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -816,7 +816,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const 
char *name,
                        return -ENOMEM;
                err = -ENOMEM;
                for (i = 0; i < nr_pages; i++) {
-                       pages[i] = __page_cache_alloc(GFP_NOFS);
+                       pages[i] = __page_cache_alloc(GFP_NOFS, NULL);
                        if (!pages[i]) {
                                nr_pages = i;
                                goto out;
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 9a59cba..67c669a 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -316,9 +316,9 @@ static struct page *logfs_get_write_page(struct inode 
*inode, u64 bix,
        int err;
 
 repeat:
-       page = find_get_page(mapping, index);
-       if (!page) {
-               page = __page_cache_alloc(GFP_NOFS);
+       page = __find_get_page(mapping, index);
+       if (!page || radix_tree_exceptional_entry(page)) {
+               page = __page_cache_alloc(GFP_NOFS, page);
                if (!page)
                        return NULL;
                err = add_to_page_cache_lru(page, mapping, index, GFP_NOFS);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index c5670b8..7aee2d1 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -413,10 +413,11 @@ static inline int __ntfs_grab_cache_pages(struct 
address_space *mapping,
        BUG_ON(!nr_pages);
        err = nr = 0;
        do {
-               pages[nr] = find_lock_page(mapping, index);
-               if (!pages[nr]) {
+               pages[nr] = __find_lock_page(mapping, index);
+               if (!pages[nr] || radix_tree_exceptional_entry(pages[nr])) {
                        if (!*cached_page) {
-                               *cached_page = page_cache_alloc(mapping);
+                               *cached_page = page_cache_alloc(mapping,
+                                                               pages[nr]);
                                if (unlikely(!*cached_page)) {
                                        err = -ENOMEM;
                                        goto err_out;
diff --git a/fs/splice.c b/fs/splice.c
index 3b7ee65..edc54ae 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -353,12 +353,12 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
                 * Page could be there, find_get_pages_contig() breaks on
                 * the first hole.
                 */
-               page = find_get_page(mapping, index);
-               if (!page) {
+               page = __find_get_page(mapping, index);
+               if (!page || radix_tree_exceptional_entry(page)) {
                        /*
                         * page didn't exist, allocate one.
                         */
-                       page = page_cache_alloc_cold(mapping);
+                       page = page_cache_alloc_cold(mapping, page);
                        if (!page)
                                break;
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index db3a78b..4b24236 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -228,28 +228,32 @@ static inline void page_unfreeze_refs(struct page *page, 
int count)
 }
 
 #ifdef CONFIG_NUMA
-extern struct page *__page_cache_alloc(gfp_t gfp);
+extern struct page *__page_cache_alloc(gfp_t gfp, struct page *shadow);
 #else
-static inline struct page *__page_cache_alloc(gfp_t gfp)
+static inline struct page *__page_cache_alloc(gfp_t gfp, struct page *shadow)
 {
        return alloc_pages(gfp, 0);
 }
 #endif
 
-static inline struct page *page_cache_alloc(struct address_space *x)
+static inline struct page *page_cache_alloc(struct address_space *x,
+                                           struct page *shadow)
 {
-       return __page_cache_alloc(mapping_gfp_mask(x));
+       return __page_cache_alloc(mapping_gfp_mask(x), shadow);
 }
 
-static inline struct page *page_cache_alloc_cold(struct address_space *x)
+static inline struct page *page_cache_alloc_cold(struct address_space *x,
+                                                struct page *shadow)
 {
-       return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
+       return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD, shadow);
 }
 
-static inline struct page *page_cache_alloc_readahead(struct address_space *x)
+static inline struct page *page_cache_alloc_readahead(struct address_space *x,
+                                                     struct page *shadow)
 {
        return __page_cache_alloc(mapping_gfp_mask(x) |
-                                 __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN);
+                                 __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN,
+                                 shadow);
 }
 
 typedef int filler_t(void *, struct page *);
diff --git a/mm/filemap.c b/mm/filemap.c
index 34b2f0b..d3e5578 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -538,7 +538,7 @@ int add_to_page_cache_lru(struct page *page, struct 
address_space *mapping,
 EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
 
 #ifdef CONFIG_NUMA
-struct page *__page_cache_alloc(gfp_t gfp)
+struct page *__page_cache_alloc(gfp_t gfp, struct page *shadow)
 {
        int n;
        struct page *page;
@@ -917,9 +917,9 @@ struct page *find_or_create_page(struct address_space 
*mapping,
        struct page *page;
        int err;
 repeat:
-       page = find_lock_page(mapping, index);
-       if (!page) {
-               page = __page_cache_alloc(gfp_mask);
+       page = __find_lock_page(mapping, index);
+       if (!page || radix_tree_exceptional_entry(page)) {
+               page = __page_cache_alloc(gfp_mask, page);
                if (!page)
                        return NULL;
                /*
@@ -1222,15 +1222,16 @@ EXPORT_SYMBOL(find_get_pages_tag);
 struct page *
 grab_cache_page_nowait(struct address_space *mapping, pgoff_t index)
 {
-       struct page *page = find_get_page(mapping, index);
+       struct page *page = __find_get_page(mapping, index);
 
-       if (page) {
+       if (page && !radix_tree_exceptional_entry(page)) {
                if (trylock_page(page))
                        return page;
                page_cache_release(page);
                return NULL;
        }
-       page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS);
+       page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS,
+                                 page);
        if (page && add_to_page_cache_lru(page, mapping, index, GFP_NOFS)) {
                page_cache_release(page);
                page = NULL;
@@ -1304,8 +1305,9 @@ find_page:
                        page_cache_sync_readahead(mapping,
                                        ra, filp,
                                        index, last_index - index);
-                       page = find_get_page(mapping, index);
-                       if (unlikely(page == NULL))
+                       page = __find_get_page(mapping, index);
+                       if (unlikely(page == NULL ||
+                                    radix_tree_exceptional_entry(page)))
                                goto no_cached_page;
                }
                if (PageReadahead(page)) {
@@ -1464,7 +1466,7 @@ no_cached_page:
                 * Ok, it wasn't cached, so we need to create a new
                 * page..
                 */
-               page = page_cache_alloc_cold(mapping);
+               page = page_cache_alloc_cold(mapping, page);
                if (!page) {
                        desc->error = -ENOMEM;
                        goto out;
@@ -1673,18 +1675,20 @@ EXPORT_SYMBOL(generic_file_aio_read);
  * page_cache_read - adds requested page to the page cache if not already there
  * @file:      file to read
  * @offset:    page index
+ * @shadow:    shadow page of the page to be added
  *
  * This adds the requested page to the page cache if it isn't already there,
  * and schedules an I/O to read in its contents from disk.
  */
-static int page_cache_read(struct file *file, pgoff_t offset)
+static int page_cache_read(struct file *file, pgoff_t offset,
+                          struct page *shadow)
 {
        struct address_space *mapping = file->f_mapping;
        struct page *page; 
        int ret;
 
        do {
-               page = page_cache_alloc_cold(mapping);
+               page = page_cache_alloc_cold(mapping, shadow);
                if (!page)
                        return -ENOMEM;
 
@@ -1815,8 +1819,8 @@ int filemap_fault(struct vm_area_struct *vma, struct 
vm_fault *vmf)
                mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
                ret = VM_FAULT_MAJOR;
 retry_find:
-               page = find_get_page(mapping, offset);
-               if (!page)
+               page = __find_get_page(mapping, offset);
+               if (!page || radix_tree_exceptional_entry(page))
                        goto no_cached_page;
        }
 
@@ -1859,7 +1863,7 @@ no_cached_page:
         * We're only likely to ever get here if MADV_RANDOM is in
         * effect.
         */
-       error = page_cache_read(file, offset);
+       error = page_cache_read(file, offset, page);
 
        /*
         * The page we want has now been added to the page cache.
@@ -1981,9 +1985,9 @@ static struct page *__read_cache_page(struct 
address_space *mapping,
        struct page *page;
        int err;
 repeat:
-       page = find_get_page(mapping, index);
-       if (!page) {
-               page = __page_cache_alloc(gfp | __GFP_COLD);
+       page = __find_get_page(mapping, index);
+       if (!page || radix_tree_exceptional_entry(page)) {
+               page = __page_cache_alloc(gfp | __GFP_COLD, page);
                if (!page)
                        return ERR_PTR(-ENOMEM);
                err = add_to_page_cache_lru(page, mapping, index, gfp);
@@ -2454,11 +2458,11 @@ struct page *grab_cache_page_write_begin(struct 
address_space *mapping,
        if (flags & AOP_FLAG_NOFS)
                gfp_notmask = __GFP_FS;
 repeat:
-       page = find_lock_page(mapping, index);
-       if (page)
+       page = __find_lock_page(mapping, index);
+       if (page && !radix_tree_exceptional_entry(page))
                goto found;
 
-       page = __page_cache_alloc(gfp_mask & ~gfp_notmask);
+       page = __page_cache_alloc(gfp_mask & ~gfp_notmask, page);
        if (!page)
                return NULL;
        status = add_to_page_cache_lru(page, mapping, index,
diff --git a/mm/readahead.c b/mm/readahead.c
index 0f85996..58142ef 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -182,7 +182,7 @@ __do_page_cache_readahead(struct address_space *mapping, 
struct file *filp,
                if (page && !radix_tree_exceptional_entry(page))
                        continue;
 
-               page = page_cache_alloc_readahead(mapping);
+               page = page_cache_alloc_readahead(mapping, page);
                if (!page)
                        break;
                page->index = page_offset;
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 92866be..83fb56e 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -32,7 +32,7 @@ static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
        struct page *page;
 
        if (!pl->num_pages_free) {
-               page = __page_cache_alloc(GFP_NOFS);
+               page = __page_cache_alloc(GFP_NOFS, NULL);
        } else {
                page = list_first_entry(&pl->free_list, struct page, lru);
                list_del(&page->lru);
@@ -83,7 +83,7 @@ int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t 
space)
        space = (space + PAGE_SIZE - 1) >> PAGE_SHIFT;   /* conv to num pages */
 
        while (space > pl->num_pages_free) {
-               struct page *page = __page_cache_alloc(GFP_NOFS);
+               struct page *page = __page_cache_alloc(GFP_NOFS, NULL);
                if (!page)
                        return -ENOMEM;
                list_add_tail(&page->lru, &pl->free_list);
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 815a224..ff76422 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -79,7 +79,7 @@ struct page **ceph_alloc_page_vector(int num_pages, gfp_t 
flags)
        if (!pages)
                return ERR_PTR(-ENOMEM);
        for (i = 0; i < num_pages; i++) {
-               pages[i] = __page_cache_alloc(flags);
+               pages[i] = __page_cache_alloc(flags, NULL);
                if (pages[i] == NULL) {
                        ceph_release_page_vector(pages, i);
                        return ERR_PTR(-ENOMEM);
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to