This is the simplest possible policy that still does something of note.
When a pte_numa is faulted, it is moved immediately. Any replacement
policy must at least do better than this and in all likelihood this
policy regresses normal workloads.

Signed-off-by: Mel Gorman <mgor...@suse.de>
Acked-by: Rik van Riel <r...@redhat.com>
---
 include/uapi/linux/mempolicy.h |    1 +
 mm/mempolicy.c                 |   38 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h
index 16fb4e6..0d11c3d 100644
--- a/include/uapi/linux/mempolicy.h
+++ b/include/uapi/linux/mempolicy.h
@@ -67,6 +67,7 @@ enum mpol_rebind_step {
 #define MPOL_F_LOCAL   (1 << 1)        /* preferred local allocation */
 #define MPOL_F_REBINDING (1 << 2)      /* identify policies in rebinding */
 #define MPOL_F_MOF     (1 << 3) /* this policy wants migrate on fault */
+#define MPOL_F_MORON   (1 << 4) /* Migrate On pte_numa Reference On Node */
 
 
 #endif /* _UAPI_LINUX_MEMPOLICY_H */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 045714d..bcaa4fe 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -118,6 +118,26 @@ static struct mempolicy default_policy = {
        .flags = MPOL_F_LOCAL,
 };
 
+static struct mempolicy preferred_node_policy[MAX_NUMNODES];
+
+static struct mempolicy *get_task_policy(struct task_struct *p)
+{
+       struct mempolicy *pol = p->mempolicy;
+       int node;
+
+       if (!pol) {
+               node = numa_node_id();
+               if (node != -1)
+                       pol = &preferred_node_policy[node];
+
+               /* preferred_node_policy is not initialised early in boot */
+               if (!pol->mode)
+                       pol = NULL;
+       }
+
+       return pol;
+}
+
 static const struct mempolicy_operations {
        int (*create)(struct mempolicy *pol, const nodemask_t *nodes);
        /*
@@ -1706,7 +1726,7 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, 
compat_ulong_t len,
 struct mempolicy *get_vma_policy(struct task_struct *task,
                struct vm_area_struct *vma, unsigned long addr)
 {
-       struct mempolicy *pol = task->mempolicy;
+       struct mempolicy *pol = get_task_policy(task);
 
        if (vma) {
                if (vma->vm_ops && vma->vm_ops->get_policy) {
@@ -2129,7 +2149,7 @@ retry_cpuset:
  */
 struct page *alloc_pages_current(gfp_t gfp, unsigned order)
 {
-       struct mempolicy *pol = current->mempolicy;
+       struct mempolicy *pol = get_task_policy(current);
        struct page *page;
        unsigned int cpuset_mems_cookie;
 
@@ -2403,6 +2423,11 @@ int mpol_misplaced(struct page *page, struct 
vm_area_struct *vma, unsigned long
        default:
                BUG();
        }
+
+       /* Migrate the page towards the node whose CPU is referencing it */
+       if (pol->flags & MPOL_F_MORON)
+               polnid = numa_node_id();
+
        if (curnid != polnid)
                ret = polnid;
 out:
@@ -2591,6 +2616,15 @@ void __init numa_policy_init(void)
                                     sizeof(struct sp_node),
                                     0, SLAB_PANIC, NULL);
 
+       for_each_node(nid) {
+               preferred_node_policy[nid] = (struct mempolicy) {
+                       .refcnt = ATOMIC_INIT(1),
+                       .mode = MPOL_PREFERRED,
+                       .flags = MPOL_F_MOF | MPOL_F_MORON,
+                       .v = { .preferred_node = nid, },
+               };
+       }
+
        /*
         * Set interleaving policy for system init. Interleaving is only
         * enabled across suitably sized nodes (default is >= 16MB), or
-- 
1.7.9.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to