Let's refactor that code. We want to check if we can offline memory
blocks. Add a new function is_mem_section_offlineable() for that and
make it call is_mem_section_offlineable() for each contained section.
Within is_mem_section_offlineable(), add some more sanity checks and
directly bail out if the section contains holes or if it spans multiple
zones.

The old code was inherently racy with concurrent offlining/memory
unplug. Let's avoid that and grab the device_hotplug_lock. Luckily
we are already holding it when calling from powerpc code.

Note1: If somebody wants to export this function for use in driver code, we
need a variant that takes the device_hotplug_lock()

Note2: If we could have a zombie device (not clear yet), the present
section checks would properly bail out early.

Note3: I'd prefer the mem_hotplug_lock in read, but as we are about to
change the locking on the removal path (IOW, don't hold it when removing
memory block devices), I do not want to go down that path.

Note4: For now we would have returned "removable" although we would
block offlining due to memory holes, multiple zones, or missing
sections.

Tested with DIMMs on x86-64. Compile-tested on Power.

Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Greg Kroah-Hartman <gre...@linuxfoundation.org>
Cc: "Rafael J. Wysocki" <raf...@kernel.org>
Cc: Andrew Morton <a...@linux-foundation.org>
Cc: Leonardo Bras <leona...@linux.ibm.com>
Cc: Nathan Lynch <nath...@linux.ibm.com>
Cc: Allison Randal <alli...@lohutok.net>
Cc: Nathan Fontenot <nf...@linux.vnet.ibm.com>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Michal Hocko <mho...@suse.com>
Cc: Dan Williams <dan.j.willi...@intel.com>
Cc: Stephen Rothwell <s...@canb.auug.org.au>
Cc: Anshuman Khandual <anshuman.khand...@arm.com>
Cc: lantianyu1...@gmail.com
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: David Hildenbrand <da...@redhat.com>
---
 .../platforms/pseries/hotplug-memory.c        | 24 ++-----
 drivers/base/memory.c                         | 37 ++++++----
 include/linux/memory.h                        |  1 +
 include/linux/memory_hotplug.h                |  5 +-
 mm/memory_hotplug.c                           | 68 +++++++++----------
 5 files changed, 67 insertions(+), 68 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotplug-memory.c
index c126b94d1943..8d80159465e4 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -337,34 +337,24 @@ static int pseries_remove_mem_node(struct device_node *np)
 
 static bool lmb_is_removable(struct drmem_lmb *lmb)
 {
-       int i, scns_per_block;
-       bool rc = true;
-       unsigned long pfn, block_sz;
-       u64 phys_addr;
+       struct memory_block *mem;
+       bool rc = false;
 
        if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
                return false;
 
-       block_sz = memory_block_size_bytes();
-       scns_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
-       phys_addr = lmb->base_addr;
-
 #ifdef CONFIG_FA_DUMP
        /*
         * Don't hot-remove memory that falls in fadump boot memory area
         * and memory that is reserved for capturing old kernel memory.
         */
-       if (is_fadump_memory_area(phys_addr, block_sz))
+       if (is_fadump_memory_area(lmb->base_addr,  memory_block_size_bytes()))
                return false;
 #endif
-
-       for (i = 0; i < scns_per_block; i++) {
-               pfn = PFN_DOWN(phys_addr);
-               if (!pfn_present(pfn))
-                       continue;
-
-               rc = rc && is_mem_section_removable(pfn, PAGES_PER_SECTION);
-               phys_addr += MIN_MEMORY_BLOCK_SIZE;
+       mem = lmb_to_memblock(lmb);
+       if (mem) {
+               rc = is_memory_block_offlineable(mem);
+               put_device(&mem->dev);
        }
 
        return rc;
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index c6d288fad493..f744250c34d0 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -104,6 +104,25 @@ static ssize_t phys_index_show(struct device *dev,
        return sprintf(buf, "%08lx\n", phys_index);
 }
 
+/*
+ * Test if a memory block is likely to be offlineable. Returns true if
+ * the block is already offline.
+ *
+ * Called under device_hotplug_lock.
+ */
+bool is_memory_block_offlineable(struct memory_block *mem)
+{
+       int i;
+
+       if (mem->state != MEM_ONLINE)
+               return true;
+
+       for (i = 0; i < sections_per_block; i++)
+               if (!is_mem_section_offlineable(mem->start_section_nr + i))
+                       return false;
+       return true;
+}
+
 /*
  * Show whether the memory block is likely to be offlineable (or is already
  * offline). Once offline, the memory block could be removed. The return
@@ -114,20 +133,14 @@ static ssize_t removable_show(struct device *dev, struct 
device_attribute *attr,
                              char *buf)
 {
        struct memory_block *mem = to_memory_block(dev);
-       unsigned long pfn;
-       int ret = 1, i;
-
-       if (mem->state != MEM_ONLINE)
-               goto out;
+       int ret;
 
-       for (i = 0; i < sections_per_block; i++) {
-               if (!present_section_nr(mem->start_section_nr + i))
-                       continue;
-               pfn = section_nr_to_pfn(mem->start_section_nr + i);
-               ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
-       }
+       ret = lock_device_hotplug_sysfs();
+       if (ret)
+               return ret;
+       ret = is_memory_block_offlineable(mem);
+       unlock_device_hotplug();
 
-out:
        return sprintf(buf, "%d\n", ret);
 }
 
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 0b8d791b6669..faf03eb64ecc 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -91,6 +91,7 @@ typedef int (*walk_memory_blocks_func_t)(struct memory_block 
*, void *);
 extern int walk_memory_blocks(unsigned long start, unsigned long size,
                              void *arg, walk_memory_blocks_func_t func);
 extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func);
+extern bool is_memory_block_offlineable(struct memory_block *mem);
 #define CONFIG_MEM_BLOCK_SIZE  (PAGES_PER_SECTION<<PAGE_SHIFT)
 #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index f4d59155f3d4..8d087772f748 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -306,15 +306,14 @@ static inline void pgdat_resize_init(struct pglist_data 
*pgdat) {}
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
 
-extern bool is_mem_section_removable(unsigned long pfn, unsigned long 
nr_pages);
+extern bool is_mem_section_offlineable(unsigned long nr);
 extern void try_offline_node(int nid);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern int remove_memory(int nid, u64 start, u64 size);
 extern void __remove_memory(int nid, u64 start, u64 size);
 
 #else
-static inline bool is_mem_section_removable(unsigned long pfn,
-                                       unsigned long nr_pages)
+static inline bool is_mem_section_offlineable(unsigned long nr)
 {
        return false;
 }
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 7a6de9b0dcab..a6d14d2b7f0c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1128,46 +1128,51 @@ static unsigned long next_active_pageblock(unsigned 
long pfn)
        return pfn + pageblock_nr_pages;
 }
 
-static bool is_pageblock_removable_nolock(unsigned long pfn)
+static int count_system_ram_pages_cb(unsigned long start_pfn,
+                                    unsigned long nr_pages, void *data)
 {
-       struct page *page = pfn_to_page(pfn);
+       unsigned long *nr_system_ram_pages = data;
+
+       *nr_system_ram_pages += nr_pages;
+       return 0;
+}
+
+/*
+ * Check if a section is likely to be offlineable.
+ *
+ * Called with device_hotplug_lock.
+ */
+bool is_mem_section_offlineable(unsigned long nr)
+{
+       const unsigned long start_pfn = section_nr_to_pfn(nr);
+       const unsigned long end_pfn = start_pfn + PAGES_PER_SECTION;
+       unsigned long pfn, nr_pages = 0;
        struct zone *zone;
 
-       /*
-        * We have to be careful here because we are iterating over memory
-        * sections which are not zone aware so we might end up outside of
-        * the zone but still within the section.
-        * We have to take care about the node as well. If the node is offline
-        * its NODE_DATA will be NULL - see page_zone.
-        */
-       if (!node_online(page_to_nid(page)))
+       if (!present_section_nr(nr))
                return false;
-
-       zone = page_zone(page);
-       pfn = page_to_pfn(page);
-       if (!zone_spans_pfn(zone, pfn))
+       if (!online_section_nr(nr))
                return false;
 
-       return !has_unmovable_pages(zone, page, MIGRATE_MOVABLE,
-                                   MEMORY_OFFLINE);
-}
-
-/* Checks if this range of memory is likely to be hot-removable. */
-bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
-{
-       unsigned long end_pfn, pfn;
+       /* we don't allow to offline sections with holes */
+       walk_system_ram_range(start_pfn, PAGES_PER_SECTION, &nr_pages,
+                             count_system_ram_pages_cb);
+       if (nr_pages != PAGES_PER_SECTION)
+               return false;
 
-       end_pfn = min(start_pfn + nr_pages,
-                       zone_end_pfn(page_zone(pfn_to_page(start_pfn))));
+       /* we don't allow to offline sections with mixed zones/nodes */
+       zone = test_pages_in_a_zone(start_pfn, end_pfn);
+       if (!zone)
+               return false;
 
-       /* Check the starting page of each pageblock within the range */
+       /* check each pageblock if it contains unmovable pages */
        for (pfn = start_pfn; pfn < end_pfn; pfn = next_active_pageblock(pfn)) {
-               if (!is_pageblock_removable_nolock(pfn))
+               if (has_unmovable_pages(zone, pfn_to_page(pfn), MIGRATE_MOVABLE,
+                                       MEMORY_OFFLINE))
                        return false;
                cond_resched();
        }
 
-       /* All pageblocks in the memory block are likely to be hot-removable */
        return true;
 }
 
@@ -1436,15 +1441,6 @@ static void node_states_clear_node(int node, struct 
memory_notify *arg)
                node_clear_state(node, N_MEMORY);
 }
 
-static int count_system_ram_pages_cb(unsigned long start_pfn,
-                                    unsigned long nr_pages, void *data)
-{
-       unsigned long *nr_system_ram_pages = data;
-
-       *nr_system_ram_pages += nr_pages;
-       return 0;
-}
-
 static int __ref __offline_pages(unsigned long start_pfn,
                  unsigned long end_pfn)
 {
-- 
2.24.1

Reply via email to