The memory we set aside in the previous patch needs to be reinserted.
We start this process via late_initcall so we will have multiple cpus to do
the work.

Signed-off-by: Mike Travis <tra...@sgi.com>
Signed-off-by: Nathan Zimmer <nzim...@sgi.com>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: "H. Peter Anvin" <h...@zytor.com>
Cc: Greg Kroah-Hartman <gre...@linuxfoundation.org>
Cc: Andrew Morton <a...@linux-foundation.org> 
Cc: Yinghai Lu <ying...@kernel.org>
---
 arch/x86/kernel/e820.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/base/memory.c  |  83 +++++++++++++++++++++++++++++++
 include/linux/memory.h |   5 ++
 3 files changed, 217 insertions(+)

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 3752dc5..d31039d 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -23,6 +23,7 @@
 
 #ifdef CONFIG_DELAY_MEM_INIT
 #include <linux/memory.h>
+#include <linux/delay.h>
 #endif
 
 #include <asm/e820.h>
@@ -397,6 +398,22 @@ static u64 min_region_size;        /* min size of region 
to slice from */
 static u64 pre_region_size;    /* multiply bsize for node low memory */
 static u64 post_region_size;   /* multiply bsize for node high memory */
 
+static unsigned long add_absent_work_start_time;
+static unsigned long add_absent_work_stop_time;
+static unsigned int add_absent_job_count;
+static atomic_t add_absent_work_count;
+
+struct absent_work {
+       struct work_struct      work;
+       struct absent_work      *next;
+       atomic_t                busy;
+       int                     cpu;
+       int                     node;
+       int                     index;
+};
+static DEFINE_PER_CPU(struct absent_work, absent_work);
+static struct absent_work *first_absent_work;
+
 static int __init setup_delay_mem_init(char *str)
 {
        int bbits, mpnbits, minmult, premult, postmult;
@@ -527,6 +544,118 @@ int __init sanitize_e820_map(struct e820entry *biosmap, 
int max_nr_map,
        }
        return ret;
 }
+
+/* Assign a cpu for this memory chunk and get the per_cpu absent_work struct */
+static struct absent_work *get_absent_work(int node)
+{
+       int cpu;
+
+       for_each_cpu(cpu, cpumask_of_node(node)) {
+               struct absent_work *aws = &per_cpu(absent_work, cpu);
+               if (aws->node)
+                       continue;
+               aws->cpu = cpu;
+               aws->node = node;
+               return aws;
+       }
+
+       /* (if this becomes a problem, we can use a cpu on another node) */
+       pr_crit("e820: No CPU on Node %d to schedule absent_work\n", node);
+       return NULL;
+}
+
+/* Count of 'not done' processes */
+static int count_absent_work_notdone(void)
+{
+       struct absent_work *aws;
+       int notdone = 0;
+
+       for (aws = first_absent_work; aws; aws = aws->next)
+               if (atomic_read(&aws->busy) < 2)
+                       notdone++;
+
+       return notdone;
+}
+
+/* The absent_work thread */
+static void add_absent_memory_work(struct work_struct *work)
+{
+       struct absent_work *aws;
+       u64 phys_addr, size;
+       int ret;
+
+       aws = container_of(work, struct absent_work, work);
+
+       phys_addr = e820_absent.map[aws->index].addr;
+       size = e820_absent.map[aws->index].size;
+       ret = memory_add_absent(aws->node, phys_addr, size);
+       if (ret)
+               pr_crit("e820: Error %d adding absent memory %llx %llx (%d)\n",
+                       ret, phys_addr, size, aws->node);
+
+       atomic_set(&aws->busy, 2);
+       atomic_dec(&add_absent_work_count);
+
+       /* if no one is waiting, then snap stop time */
+       if (!count_absent_work_notdone())
+               add_absent_work_stop_time = get_seconds();
+}
+
+/* Initialize absent_work threads */
+static int add_absent_memory(void)
+{
+       struct absent_work *aws = NULL;
+       int cpu, i;
+
+       add_absent_work_start_time = get_seconds();
+       add_absent_work_stop_time = 0;
+       atomic_set(&add_absent_work_count, 0);
+
+       for_each_online_cpu(cpu) {
+               struct absent_work *aws = &per_cpu(absent_work, cpu);
+               aws->node = 0;
+       }
+
+       /* setup each work thread */
+       for (i = 0; i < e820_absent.nr_map; i++) {
+               u64 phys_addr = e820_absent.map[i].addr;
+               int node = memory_add_physaddr_to_nid(phys_addr);
+
+               if (!node_online(node))
+                       continue;
+
+               if (!aws) {
+                       aws = get_absent_work(node);
+                       first_absent_work = aws;
+               } else {
+                       aws->next = get_absent_work(node);
+                       aws = aws->next;
+               }
+
+               if (!aws)
+                       continue;
+
+               INIT_WORK(&aws->work, add_absent_memory_work);
+               atomic_set(&aws->busy, 0);
+               aws->index = i;
+
+               /* schedule absent_work thread */
+               if (!schedule_work_on(aws->cpu, &aws->work))
+                       BUG();
+       }
+
+
+       pr_info("e820: Add absent memory started\n");
+
+       return 0;
+}
+
+/* Called during bootup to start adding absent_mem early */
+static int absent_memory_init(void)
+{
+       return add_absent_memory();
+}
+late_initcall(absent_memory_init);
 #endif /* CONFIG_DELAY_MEM_INIT */
 
 static int __init __append_e820_map(struct e820entry *biosmap, int nr_map)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 14f8a69..5b4245a 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -442,6 +442,89 @@ static inline int memory_probe_init(void)
 }
 #endif
 
+#ifdef CONFIG_DELAY_MEM_INIT
+static struct memory_block *memory_get_block(u64 phys_addr,
+                                       struct memory_block *last_mem_blk)
+{
+       unsigned long pfn = phys_addr >> PAGE_SHIFT;
+       struct memory_block *mem_blk = NULL;
+       struct mem_section *mem_sect;
+       unsigned long section_nr = pfn_to_section_nr(pfn);
+
+       if (!present_section_nr(section_nr))
+               return NULL;
+
+       mem_sect = __nr_to_section(section_nr);
+       mem_blk = find_memory_block_hinted(mem_sect, last_mem_blk);
+       return mem_blk;
+}
+
+/* addr and size must be aligned on memory_block_size boundaries */
+int memory_add_absent(int nid, u64 phys_addr, u64 size)
+{
+       struct memory_block *mem = NULL;
+       struct page *first_page;
+       unsigned long block_sz;
+       unsigned long nr_pages;
+       unsigned long start_pfn;
+       int ret;
+
+       block_sz = get_memory_block_size();
+       if (phys_addr & (block_sz - 1) || size & (block_sz - 1))
+               return -EINVAL;
+
+       /* memory already present? */
+       if (memory_get_block(phys_addr, NULL))
+               return -EBUSY;
+
+       ret = add_memory(nid, phys_addr, size);
+       if (ret)
+               return ret;
+
+       /* grab first block to use for onlining process */
+       mem = memory_get_block(phys_addr, NULL);
+       if (!mem)
+               return -ENOMEM;
+
+       first_page = pfn_to_page(mem->start_section_nr << PFN_SECTION_SHIFT);
+       start_pfn = page_to_pfn(first_page);
+       nr_pages = size >> PAGE_SHIFT;
+
+       ret = online_pages(start_pfn, nr_pages, ONLINE_KEEP);
+       if (ret)
+               return ret;
+
+       for (;;) {
+               /* we already have first block from above */
+               mutex_lock(&mem->state_mutex);
+               if (mem->state == MEM_OFFLINE) {
+                       mem->state = MEM_ONLINE;
+                       kobject_uevent(&mem->dev.kobj, KOBJ_ONLINE);
+               }
+               mutex_unlock(&mem->state_mutex);
+
+               phys_addr += block_sz;
+               size -= block_sz;
+               if (!size)
+                       break;
+
+               mem = memory_get_block(phys_addr, mem);
+               if (mem)
+                       continue;
+
+               pr_err("memory_get_block failed at %llx\n", phys_addr);
+               return -EFAULT;
+       }
+       return 0;
+}
+
+#else
+static inline int start_add_absent_init(void)
+{
+       return 0;
+}
+#endif /* CONFIG_DELAY_MEM_INIT */
+
 #ifdef CONFIG_MEMORY_FAILURE
 /*
  * Support for offlining pages of memory
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 85c31a8..a000c54 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -128,6 +128,11 @@ extern struct memory_block *find_memory_block(struct 
mem_section *);
 enum mem_add_context { BOOT, HOTPLUG };
 #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 
+#ifdef CONFIG_DELAY_MEM_INIT
+extern int memory_add_absent(int nid, u64 phys_addr, u64 size);
+#endif
+
+
 #ifdef CONFIG_MEMORY_HOTPLUG
 #define hotplug_memory_notifier(fn, pri) ({            \
        static __meminitdata struct notifier_block fn##_mem_nb =\
-- 
1.8.2.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to