In order to make informed placement and reclaim decisions, the page
allocator requires the eviction information of refaulting pages.

Every site that does a find_or_create()-style allocation is converted
to pass this value to the page_cache_alloc() family of functions,
which in turn pass it down to the page allocator.

Signed-off-by: Johannes Weiner <han...@cmpxchg.org>
---
 fs/btrfs/compression.c  |  7 +++--
 fs/cachefiles/rdwr.c    | 25 ++++++++++-------
 fs/ceph/xattr.c         |  2 +-
 fs/logfs/readwrite.c    |  9 ++++--
 fs/ntfs/file.c          | 10 +++++--
 fs/splice.c             |  9 +++---
 include/linux/gfp.h     | 18 +++++++-----
 include/linux/pagemap.h | 26 +++++++++++------
 include/linux/swap.h    |  6 ++++
 mm/filemap.c            | 74 ++++++++++++++++++++++++++++++-------------------
 mm/mempolicy.c          | 17 +++++++-----
 mm/page_alloc.c         | 51 +++++++++++++++++++---------------
 mm/readahead.c          |  6 ++--
 net/ceph/pagelist.c     |  4 +--
 net/ceph/pagevec.c      |  2 +-
 15 files changed, 163 insertions(+), 103 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 4a80f6b..9c83b84 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -464,6 +464,8 @@ static noinline int add_ra_bio_pages(struct inode *inode,
        end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
 
        while (last_offset < compressed_end) {
+               unsigned long distance;
+
                pg_index = last_offset >> PAGE_CACHE_SHIFT;
 
                if (pg_index > end_index)
@@ -478,12 +480,11 @@ static noinline int add_ra_bio_pages(struct inode *inode,
                                break;
                        goto next;
                }
-
+               distance = workingset_refault_distance(page);
                page = __page_cache_alloc(mapping_gfp_mask(mapping) &
-                                                               ~__GFP_FS);
+                                         ~__GFP_FS, distance);
                if (!page)
                        break;
-
                if (add_to_page_cache_lru(page, mapping, pg_index,
                                                                GFP_NOFS)) {
                        page_cache_release(page);
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 4809922..3d4a75a 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -12,6 +12,7 @@
 #include <linux/mount.h>
 #include <linux/slab.h>
 #include <linux/file.h>
+#include <linux/swap.h>
 #include "internal.h"
 
 /*
@@ -256,17 +257,19 @@ static int cachefiles_read_backing_file_one(struct 
cachefiles_object *object,
        newpage = NULL;
 
        for (;;) {
-               backpage = find_get_page(bmapping, netpage->index);
-               if (backpage)
-                       goto backing_page_already_present;
+               unsigned long distance;
 
+               backpage = __find_get_page(bmapping, netpage->index);
+               if (backpage && !radix_tree_exceptional_entry(backpage))
+                       goto backing_page_already_present;
+               distance = workingset_refault_distance(backpage);
                if (!newpage) {
                        newpage = __page_cache_alloc(cachefiles_gfp |
-                                                    __GFP_COLD);
+                                                    __GFP_COLD,
+                                                    distance);
                        if (!newpage)
                                goto nomem_monitor;
                }
-
                ret = add_to_page_cache(newpage, bmapping,
                                        netpage->index, cachefiles_gfp);
                if (ret == 0)
@@ -507,17 +510,19 @@ static int cachefiles_read_backing_file(struct 
cachefiles_object *object,
                }
 
                for (;;) {
-                       backpage = find_get_page(bmapping, netpage->index);
-                       if (backpage)
-                               goto backing_page_already_present;
+                       unsigned long distance;
 
+                       backpage = __find_get_page(bmapping, netpage->index);
+                       if (backpage && !radix_tree_exceptional_entry(backpage))
+                               goto backing_page_already_present;
+                       distance = workingset_refault_distance(backpage);
                        if (!newpage) {
                                newpage = __page_cache_alloc(cachefiles_gfp |
-                                                            __GFP_COLD);
+                                                            __GFP_COLD,
+                                                            distance);
                                if (!newpage)
                                        goto nomem;
                        }
-
                        ret = add_to_page_cache(newpage, bmapping,
                                                netpage->index, cachefiles_gfp);
                        if (ret == 0)
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 9b6b2b6..d52c9f0 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -815,7 +815,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const 
char *name,
                        return -ENOMEM;
                err = -ENOMEM;
                for (i = 0; i < nr_pages; i++) {
-                       pages[i] = __page_cache_alloc(GFP_NOFS);
+                       pages[i] = __page_cache_alloc(GFP_NOFS, 0);
                        if (!pages[i]) {
                                nr_pages = i;
                                goto out;
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 9a59cba..0c4535d 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -19,6 +19,7 @@
 #include "logfs.h"
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/swap.h>
 
 static u64 adjust_bix(u64 bix, level_t level)
 {
@@ -316,9 +317,11 @@ static struct page *logfs_get_write_page(struct inode 
*inode, u64 bix,
        int err;
 
 repeat:
-       page = find_get_page(mapping, index);
-       if (!page) {
-               page = __page_cache_alloc(GFP_NOFS);
+       page = __find_get_page(mapping, index);
+       if (!page || radix_tree_exceptional_entry(page)) {
+               unsigned long distance = workingset_refault_distance(page);
+
+               page = __page_cache_alloc(GFP_NOFS, distance);
                if (!page)
                        return NULL;
                err = add_to_page_cache_lru(page, mapping, index, GFP_NOFS);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 5b2d4f0..a8a4e07 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -412,10 +412,14 @@ static inline int __ntfs_grab_cache_pages(struct 
address_space *mapping,
        BUG_ON(!nr_pages);
        err = nr = 0;
        do {
-               pages[nr] = find_lock_page(mapping, index);
-               if (!pages[nr]) {
+               pages[nr] = __find_lock_page(mapping, index);
+               if (!pages[nr] || radix_tree_exceptional_entry(pages[nr])) {
+                       unsigned long distance;
+
+                       distance = workingset_refault_distance(pages[nr]);
                        if (!*cached_page) {
-                               *cached_page = page_cache_alloc(mapping);
+                               *cached_page = page_cache_alloc(mapping,
+                                                               distance);
                                if (unlikely(!*cached_page)) {
                                        err = -ENOMEM;
                                        goto err_out;
diff --git a/fs/splice.c b/fs/splice.c
index 29e394e..e60ddfc 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -352,15 +352,16 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
                 * Page could be there, find_get_pages_contig() breaks on
                 * the first hole.
                 */
-               page = find_get_page(mapping, index);
-               if (!page) {
+               page = __find_get_page(mapping, index);
+               if (!page || radix_tree_exceptional_entry(page)) {
+                       unsigned long distance;
                        /*
                         * page didn't exist, allocate one.
                         */
-                       page = page_cache_alloc_cold(mapping);
+                       distance = workingset_refault_distance(page);
+                       page = page_cache_alloc_cold(mapping, distance);
                        if (!page)
                                break;
-
                        error = add_to_page_cache_lru(page, mapping, index,
                                                GFP_KERNEL);
                        if (unlikely(error)) {
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0f615eb..caf8d34 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -298,13 +298,16 @@ static inline void arch_alloc_page(struct page *page, int 
order) { }
 
 struct page *
 __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
-                      struct zonelist *zonelist, nodemask_t *nodemask);
+                      struct zonelist *zonelist, nodemask_t *nodemask,
+                      unsigned long refault_distance);
 
 static inline struct page *
 __alloc_pages(gfp_t gfp_mask, unsigned int order,
-               struct zonelist *zonelist)
+             struct zonelist *zonelist, unsigned long refault_distance)
 {
-       return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL);
+       return __alloc_pages_nodemask(gfp_mask, order,
+                                     zonelist, NULL,
+                                     refault_distance);
 }
 
 static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
@@ -314,7 +317,7 @@ static inline struct page *alloc_pages_node(int nid, gfp_t 
gfp_mask,
        if (nid < 0)
                nid = numa_node_id();
 
-       return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
+       return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask), 0);
 }
 
 static inline struct page *alloc_pages_exact_node(int nid, gfp_t gfp_mask,
@@ -322,16 +325,17 @@ static inline struct page *alloc_pages_exact_node(int 
nid, gfp_t gfp_mask,
 {
        VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES || !node_online(nid));
 
-       return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
+       return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask), 0);
 }
 
 #ifdef CONFIG_NUMA
-extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
+extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order,
+                                       unsigned long refault_distance);
 
 static inline struct page *
 alloc_pages(gfp_t gfp_mask, unsigned int order)
 {
-       return alloc_pages_current(gfp_mask, order);
+       return alloc_pages_current(gfp_mask, order, 0);
 }
 extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
                        struct vm_area_struct *vma, unsigned long addr,
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 258eb38..d758243 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -228,28 +228,36 @@ static inline void page_unfreeze_refs(struct page *page, 
int count)
 }
 
 #ifdef CONFIG_NUMA
-extern struct page *__page_cache_alloc(gfp_t gfp);
+extern struct page *__page_cache_alloc(gfp_t gfp,
+                                      unsigned long refault_distance);
 #else
-static inline struct page *__page_cache_alloc(gfp_t gfp)
+static inline struct page *__page_cache_alloc(gfp_t gfp,
+                                             unsigned long refault_distance)
 {
-       return alloc_pages(gfp, 0);
+       return __alloc_pages(gfp, 0, node_zonelist(numa_node_id(), gfp),
+                            refault_distance);
 }
 #endif
 
-static inline struct page *page_cache_alloc(struct address_space *x)
+static inline struct page *page_cache_alloc(struct address_space *x,
+                                           unsigned long refault_distance)
 {
-       return __page_cache_alloc(mapping_gfp_mask(x));
+       return __page_cache_alloc(mapping_gfp_mask(x), refault_distance);
 }
 
-static inline struct page *page_cache_alloc_cold(struct address_space *x)
+static inline struct page *page_cache_alloc_cold(struct address_space *x,
+                                                unsigned long refault_distance)
 {
-       return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
+       return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD,
+                                 refault_distance);
 }
 
-static inline struct page *page_cache_alloc_readahead(struct address_space *x)
+static inline struct page *page_cache_alloc_readahead(struct address_space *x,
+                                                     unsigned long 
refault_distance)
 {
        return __page_cache_alloc(mapping_gfp_mask(x) |
-                                 __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN);
+                                 __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN,
+                                 refault_distance);
 }
 
 typedef int filler_t(void *, struct page *);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2818a12..ffa323a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -221,6 +221,12 @@ struct swap_list_t {
        int next;       /* swapfile to be used next */
 };
 
+/* linux/mm/workingset.c */
+static inline unsigned long workingset_refault_distance(struct page *page)
+{
+       return ~0UL;
+}
+
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
 extern unsigned long totalreserve_pages;
diff --git a/mm/filemap.c b/mm/filemap.c
index dd0835e..10f8a62 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -518,7 +518,7 @@ int add_to_page_cache_lru(struct page *page, struct 
address_space *mapping,
 EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
 
 #ifdef CONFIG_NUMA
-struct page *__page_cache_alloc(gfp_t gfp)
+struct page *__page_cache_alloc(gfp_t gfp, unsigned long refault_distance)
 {
        int n;
        struct page *page;
@@ -528,12 +528,12 @@ struct page *__page_cache_alloc(gfp_t gfp)
                do {
                        cpuset_mems_cookie = get_mems_allowed();
                        n = cpuset_mem_spread_node();
-                       page = alloc_pages_exact_node(n, gfp, 0);
+                       page = __alloc_pages(gfp, 0, node_zonelist(n, gfp),
+                                            refault_distance);
                } while (!put_mems_allowed(cpuset_mems_cookie) && !page);
-
-               return page;
-       }
-       return alloc_pages(gfp, 0);
+       } else
+               page = alloc_pages_current(gfp, 0, refault_distance);
+       return page;
 }
 EXPORT_SYMBOL(__page_cache_alloc);
 #endif
@@ -894,9 +894,11 @@ struct page *find_or_create_page(struct address_space 
*mapping,
        struct page *page;
        int err;
 repeat:
-       page = find_lock_page(mapping, index);
-       if (!page) {
-               page = __page_cache_alloc(gfp_mask);
+       page = __find_lock_page(mapping, index);
+       if (!page || radix_tree_exceptional_entry(page)) {
+               unsigned long distance = workingset_refault_distance(page);
+
+               page = __page_cache_alloc(gfp_mask, distance);
                if (!page)
                        return NULL;
                /*
@@ -1199,16 +1201,21 @@ EXPORT_SYMBOL(find_get_pages_tag);
 struct page *
 grab_cache_page_nowait(struct address_space *mapping, pgoff_t index)
 {
-       struct page *page = find_get_page(mapping, index);
+       struct page *page = __find_get_page(mapping, index);
+       unsigned long distance;
 
-       if (page) {
+       if (page && !radix_tree_exceptional_entry(page)) {
                if (trylock_page(page))
                        return page;
                page_cache_release(page);
                return NULL;
        }
-       page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS);
-       if (page && add_to_page_cache_lru(page, mapping, index, GFP_NOFS)) {
+       distance = workingset_refault_distance(page);
+       page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS,
+                                 distance);
+       if (!page)
+               return NULL;
+       if (add_to_page_cache_lru(page, mapping, index, GFP_NOFS)) {
                page_cache_release(page);
                page = NULL;
        }
@@ -1270,6 +1277,7 @@ static void do_generic_file_read(struct file *filp, 
loff_t *ppos,
        offset = *ppos & ~PAGE_CACHE_MASK;
 
        for (;;) {
+               unsigned long distance;
                struct page *page;
                pgoff_t end_index;
                loff_t isize;
@@ -1282,8 +1290,9 @@ find_page:
                        page_cache_sync_readahead(mapping,
                                        ra, filp,
                                        index, last_index - index);
-                       page = find_get_page(mapping, index);
-                       if (unlikely(page == NULL))
+                       page = __find_get_page(mapping, index);
+                       if (unlikely(!page ||
+                                    radix_tree_exceptional_entry(page)))
                                goto no_cached_page;
                }
                if (PageReadahead(page)) {
@@ -1441,7 +1450,8 @@ no_cached_page:
                 * Ok, it wasn't cached, so we need to create a new
                 * page..
                 */
-               page = page_cache_alloc_cold(mapping);
+               distance = workingset_refault_distance(page);
+               page = page_cache_alloc_cold(mapping, distance);
                if (!page) {
                        desc->error = -ENOMEM;
                        goto out;
@@ -1650,21 +1660,22 @@ EXPORT_SYMBOL(generic_file_aio_read);
  * page_cache_read - adds requested page to the page cache if not already there
  * @file:      file to read
  * @offset:    page index
+ * @distance:  refault distance
  *
  * This adds the requested page to the page cache if it isn't already there,
  * and schedules an I/O to read in its contents from disk.
  */
-static int page_cache_read(struct file *file, pgoff_t offset)
+static int page_cache_read(struct file *file, pgoff_t offset,
+                          unsigned long distance)
 {
        struct address_space *mapping = file->f_mapping;
        struct page *page; 
        int ret;
 
        do {
-               page = page_cache_alloc_cold(mapping);
+               page = page_cache_alloc_cold(mapping, distance);
                if (!page)
                        return -ENOMEM;
-
                ret = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL);
                if (ret == 0)
                        ret = mapping->a_ops->readpage(file, page);
@@ -1767,6 +1778,7 @@ int filemap_fault(struct vm_area_struct *vma, struct 
vm_fault *vmf)
        struct file_ra_state *ra = &file->f_ra;
        struct inode *inode = mapping->host;
        pgoff_t offset = vmf->pgoff;
+       unsigned long distance;
        struct page *page;
        pgoff_t size;
        int ret = 0;
@@ -1792,8 +1804,8 @@ int filemap_fault(struct vm_area_struct *vma, struct 
vm_fault *vmf)
                mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
                ret = VM_FAULT_MAJOR;
 retry_find:
-               page = find_get_page(mapping, offset);
-               if (!page)
+               page = __find_get_page(mapping, offset);
+               if (!page || radix_tree_exceptional_entry(page))
                        goto no_cached_page;
        }
 
@@ -1836,7 +1848,8 @@ no_cached_page:
         * We're only likely to ever get here if MADV_RANDOM is in
         * effect.
         */
-       error = page_cache_read(file, offset);
+       distance = workingset_refault_distance(page);
+       error = page_cache_read(file, offset, distance);
 
        /*
         * The page we want has now been added to the page cache.
@@ -1958,9 +1971,11 @@ static struct page *__read_cache_page(struct 
address_space *mapping,
        struct page *page;
        int err;
 repeat:
-       page = find_get_page(mapping, index);
-       if (!page) {
-               page = __page_cache_alloc(gfp | __GFP_COLD);
+       page = __find_get_page(mapping, index);
+       if (!page || radix_tree_exceptional_entry(page)) {
+               unsigned long distance = workingset_refault_distance(page);
+
+               page = __page_cache_alloc(gfp | __GFP_COLD, distance);
                if (!page)
                        return ERR_PTR(-ENOMEM);
                err = add_to_page_cache_lru(page, mapping, index, gfp);
@@ -2424,6 +2439,7 @@ struct page *grab_cache_page_write_begin(struct 
address_space *mapping,
        gfp_t gfp_mask;
        struct page *page;
        gfp_t gfp_notmask = 0;
+       unsigned long distance;
 
        gfp_mask = mapping_gfp_mask(mapping);
        if (mapping_cap_account_dirty(mapping))
@@ -2431,11 +2447,11 @@ struct page *grab_cache_page_write_begin(struct 
address_space *mapping,
        if (flags & AOP_FLAG_NOFS)
                gfp_notmask = __GFP_FS;
 repeat:
-       page = find_lock_page(mapping, index);
-       if (page)
+       page = __find_lock_page(mapping, index);
+       if (page && !radix_tree_exceptional_entry(page))
                goto found;
-
-       page = __page_cache_alloc(gfp_mask & ~gfp_notmask);
+       distance = workingset_refault_distance(page);
+       page = __page_cache_alloc(gfp_mask & ~gfp_notmask, distance);
        if (!page)
                return NULL;
        status = add_to_page_cache_lru(page, mapping, index,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 7431001..69f57b8 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1944,13 +1944,14 @@ out:
 /* Allocate a page in interleaved policy.
    Own path because it needs to do special accounting. */
 static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
-                                       unsigned nid)
+                                         unsigned nid,
+                                         unsigned long refault_distance)
 {
        struct zonelist *zl;
        struct page *page;
 
        zl = node_zonelist(nid, gfp);
-       page = __alloc_pages(gfp, order, zl);
+       page = __alloc_pages(gfp, order, zl, refault_distance);
        if (page && page_zone(page) == zonelist_zone(&zl->_zonerefs[0]))
                inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
        return page;
@@ -1996,7 +1997,7 @@ retry_cpuset:
 
                nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
                mpol_cond_put(pol);
-               page = alloc_page_interleave(gfp, order, nid);
+               page = alloc_page_interleave(gfp, order, nid, 0);
                if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
                        goto retry_cpuset;
 
@@ -2004,7 +2005,7 @@ retry_cpuset:
        }
        page = __alloc_pages_nodemask(gfp, order,
                                      policy_zonelist(gfp, pol, node),
-                                     policy_nodemask(gfp, pol));
+                                     policy_nodemask(gfp, pol), 0);
        if (unlikely(mpol_needs_cond_ref(pol)))
                __mpol_put(pol);
        if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
@@ -2031,7 +2032,8 @@ retry_cpuset:
  *     1) it's ok to take cpuset_sem (can WAIT), and
  *     2) allocating for current task (not interrupt).
  */
-struct page *alloc_pages_current(gfp_t gfp, unsigned order)
+struct page *alloc_pages_current(gfp_t gfp, unsigned order,
+                                unsigned long refault_distance)
 {
        struct mempolicy *pol = get_task_policy(current);
        struct page *page;
@@ -2048,11 +2050,12 @@ retry_cpuset:
         * nor system default_policy
         */
        if (pol->mode == MPOL_INTERLEAVE)
-               page = alloc_page_interleave(gfp, order, interleave_nodes(pol));
+               page = alloc_page_interleave(gfp, order, interleave_nodes(pol),
+                                            refault_distance);
        else
                page = __alloc_pages_nodemask(gfp, order,
                                policy_zonelist(gfp, pol, numa_node_id()),
-                               policy_nodemask(gfp, pol));
+                               policy_nodemask(gfp, pol), refault_distance);
 
        if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
                goto retry_cpuset;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a64d786..92b4c01 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1842,7 +1842,8 @@ static inline void init_zone_allows_reclaim(int nid)
 static struct page *
 get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int 
order,
                struct zonelist *zonelist, int high_zoneidx, int alloc_flags,
-               struct zone *preferred_zone, int migratetype)
+               struct zone *preferred_zone, int migratetype,
+               unsigned long refault_distance)
 {
        struct zoneref *z;
        struct page *page = NULL;
@@ -2105,7 +2106,7 @@ static inline struct page *
 __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
        struct zonelist *zonelist, enum zone_type high_zoneidx,
        nodemask_t *nodemask, struct zone *preferred_zone,
-       int migratetype)
+       int migratetype, unsigned long refault_distance)
 {
        struct page *page;
 
@@ -2123,7 +2124,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
        page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask,
                order, zonelist, high_zoneidx,
                ALLOC_WMARK_HIGH|ALLOC_CPUSET,
-               preferred_zone, migratetype);
+               preferred_zone, migratetype, refault_distance);
        if (page)
                goto out;
 
@@ -2158,7 +2159,7 @@ static struct page *
 __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
        struct zonelist *zonelist, enum zone_type high_zoneidx,
        nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
-       int migratetype, bool sync_migration,
+       int migratetype, unsigned long refault_distance, bool sync_migration,
        bool *contended_compaction, bool *deferred_compaction,
        unsigned long *did_some_progress)
 {
@@ -2186,7 +2187,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int 
order,
                page = get_page_from_freelist(gfp_mask, nodemask,
                                order, zonelist, high_zoneidx,
                                alloc_flags & ~ALLOC_NO_WATERMARKS,
-                               preferred_zone, migratetype);
+                               preferred_zone, migratetype, refault_distance);
                if (page) {
                        preferred_zone->compact_blockskip_flush = false;
                        preferred_zone->compact_considered = 0;
@@ -2221,7 +2222,7 @@ static inline struct page *
 __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
        struct zonelist *zonelist, enum zone_type high_zoneidx,
        nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
-       int migratetype, bool sync_migration,
+       int migratetype, unsigned long refault_distance, bool sync_migration,
        bool *contended_compaction, bool *deferred_compaction,
        unsigned long *did_some_progress)
 {
@@ -2262,7 +2263,8 @@ static inline struct page *
 __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
        struct zonelist *zonelist, enum zone_type high_zoneidx,
        nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
-       int migratetype, unsigned long *did_some_progress)
+       int migratetype, unsigned long refault_distance,
+       unsigned long *did_some_progress)
 {
        struct page *page = NULL;
        bool drained = false;
@@ -2278,9 +2280,9 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int 
order,
 
 retry:
        page = get_page_from_freelist(gfp_mask, nodemask, order,
-                                       zonelist, high_zoneidx,
-                                       alloc_flags & ~ALLOC_NO_WATERMARKS,
-                                       preferred_zone, migratetype);
+                               zonelist, high_zoneidx,
+                               alloc_flags & ~ALLOC_NO_WATERMARKS,
+                               preferred_zone, migratetype, refault_distance);
 
        /*
         * If an allocation failed after direct reclaim, it could be because
@@ -2303,14 +2305,14 @@ static inline struct page *
 __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
        struct zonelist *zonelist, enum zone_type high_zoneidx,
        nodemask_t *nodemask, struct zone *preferred_zone,
-       int migratetype)
+       int migratetype, unsigned long refault_distance)
 {
        struct page *page;
 
        do {
                page = get_page_from_freelist(gfp_mask, nodemask, order,
                        zonelist, high_zoneidx, ALLOC_NO_WATERMARKS,
-                       preferred_zone, migratetype);
+                       preferred_zone, migratetype, refault_distance);
 
                if (!page && gfp_mask & __GFP_NOFAIL)
                        wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
@@ -2391,7 +2393,7 @@ static inline struct page *
 __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
        struct zonelist *zonelist, enum zone_type high_zoneidx,
        nodemask_t *nodemask, struct zone *preferred_zone,
-       int migratetype)
+       int migratetype, unsigned long refault_distance)
 {
        const gfp_t wait = gfp_mask & __GFP_WAIT;
        struct page *page = NULL;
@@ -2449,7 +2451,7 @@ rebalance:
        /* This is the last chance, in general, before the goto nopage. */
        page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
                        high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
-                       preferred_zone, migratetype);
+                       preferred_zone, migratetype, refault_distance);
        if (page)
                goto got_pg;
 
@@ -2464,7 +2466,8 @@ rebalance:
 
                page = __alloc_pages_high_priority(gfp_mask, order,
                                zonelist, high_zoneidx, nodemask,
-                               preferred_zone, migratetype);
+                               preferred_zone, migratetype,
+                               refault_distance);
                if (page) {
                        goto got_pg;
                }
@@ -2490,7 +2493,8 @@ rebalance:
                                        zonelist, high_zoneidx,
                                        nodemask,
                                        alloc_flags, preferred_zone,
-                                       migratetype, sync_migration,
+                                       migratetype, refault_distance,
+                                       sync_migration,
                                        &contended_compaction,
                                        &deferred_compaction,
                                        &did_some_progress);
@@ -2513,7 +2517,8 @@ rebalance:
                                        zonelist, high_zoneidx,
                                        nodemask,
                                        alloc_flags, preferred_zone,
-                                       migratetype, &did_some_progress);
+                                       migratetype, refault_distance,
+                                       &did_some_progress);
        if (page)
                goto got_pg;
 
@@ -2532,7 +2537,7 @@ rebalance:
                        page = __alloc_pages_may_oom(gfp_mask, order,
                                        zonelist, high_zoneidx,
                                        nodemask, preferred_zone,
-                                       migratetype);
+                                       migratetype, refault_distance);
                        if (page)
                                goto got_pg;
 
@@ -2575,7 +2580,8 @@ rebalance:
                                        zonelist, high_zoneidx,
                                        nodemask,
                                        alloc_flags, preferred_zone,
-                                       migratetype, sync_migration,
+                                       migratetype, refault_distance,
+                                       sync_migration,
                                        &contended_compaction,
                                        &deferred_compaction,
                                        &did_some_progress);
@@ -2598,7 +2604,8 @@ got_pg:
  */
 struct page *
 __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
-                       struct zonelist *zonelist, nodemask_t *nodemask)
+                      struct zonelist *zonelist, nodemask_t *nodemask,
+                      unsigned long refault_distance)
 {
        enum zone_type high_zoneidx = gfp_zone(gfp_mask);
        struct zone *preferred_zone;
@@ -2649,7 +2656,7 @@ retry_cpuset:
        /* First allocation attempt */
        page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
                        zonelist, high_zoneidx, alloc_flags,
-                       preferred_zone, migratetype);
+                       preferred_zone, migratetype, refault_distance);
        if (unlikely(!page)) {
                /*
                 * Runtime PM, block IO and its error handling path
@@ -2659,7 +2666,7 @@ retry_cpuset:
                gfp_mask = memalloc_noio_flags(gfp_mask);
                page = __alloc_pages_slowpath(gfp_mask, order,
                                zonelist, high_zoneidx, nodemask,
-                               preferred_zone, migratetype);
+                               preferred_zone, migratetype, refault_distance);
        }
 
        trace_mm_page_alloc(page, order, gfp_mask, migratetype);
diff --git a/mm/readahead.c b/mm/readahead.c
index 29efd45..1ff6104 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -11,6 +11,7 @@
 #include <linux/fs.h>
 #include <linux/gfp.h>
 #include <linux/mm.h>
+#include <linux/swap.h>
 #include <linux/export.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
@@ -172,6 +173,7 @@ __do_page_cache_readahead(struct address_space *mapping, 
struct file *filp,
         */
        for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
                pgoff_t page_offset = offset + page_idx;
+               unsigned long distance;
 
                if (page_offset > end_index)
                        break;
@@ -181,8 +183,8 @@ __do_page_cache_readahead(struct address_space *mapping, 
struct file *filp,
                rcu_read_unlock();
                if (page && !radix_tree_exceptional_entry(page))
                        continue;
-
-               page = page_cache_alloc_readahead(mapping);
+               distance = workingset_refault_distance(page);
+               page = page_cache_alloc_readahead(mapping, distance);
                if (!page)
                        break;
                page->index = page_offset;
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 92866be..fabdc16 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -32,7 +32,7 @@ static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
        struct page *page;
 
        if (!pl->num_pages_free) {
-               page = __page_cache_alloc(GFP_NOFS);
+               page = __page_cache_alloc(GFP_NOFS, 0);
        } else {
                page = list_first_entry(&pl->free_list, struct page, lru);
                list_del(&page->lru);
@@ -83,7 +83,7 @@ int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t 
space)
        space = (space + PAGE_SIZE - 1) >> PAGE_SHIFT;   /* conv to num pages */
 
        while (space > pl->num_pages_free) {
-               struct page *page = __page_cache_alloc(GFP_NOFS);
+               struct page *page = __page_cache_alloc(GFP_NOFS, 0);
                if (!page)
                        return -ENOMEM;
                list_add_tail(&page->lru, &pl->free_list);
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 815a224..b1151f4 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -79,7 +79,7 @@ struct page **ceph_alloc_page_vector(int num_pages, gfp_t 
flags)
        if (!pages)
                return ERR_PTR(-ENOMEM);
        for (i = 0; i < num_pages; i++) {
-               pages[i] = __page_cache_alloc(flags);
+               pages[i] = __page_cache_alloc(flags, 0);
                if (pages[i] == NULL) {
                        ceph_release_page_vector(pages, i);
                        return ERR_PTR(-ENOMEM);
-- 
1.8.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to