{un}lock_memory_hotplug is used to synchronize against memory hotplug.
Currently it is backed by a mutex, which makes it a bit of hammer -
threads that only want to get a stable value of online nodes mask won't
be able to proceed concurrently.

This patch fixes this by turning mem_hotplug_mutex to an rw sempahore so
that lock_memory_hotplug only takes it for reading while the memory
hotplug code locks it for writing. This will allow to invoke code under
{un}lock_memory_hotplug concurrently while still guaranteeing protection
against memory hotplug.

Signed-off-by: Vladimir Davydov <vdavy...@parallels.com>
---
 include/linux/mmzone.h |    7 +++--
 mm/memory_hotplug.c    |   70 +++++++++++++++++++++---------------------------
 mm/vmscan.c            |    2 +-
 3 files changed, 35 insertions(+), 44 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fac5509c18f0..586cc2d2c25e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -481,9 +481,8 @@ struct zone {
         * give them a chance of being in the same cacheline.
         *
         * Write access to present_pages at runtime should be protected by
-        * lock_memory_hotplug()/unlock_memory_hotplug().  Any reader who can't
-        * tolerant drift of present_pages should hold memory hotplug lock to
-        * get a stable value.
+        * mem_hotplug_sem. Any reader who can't tolerant drift of
+        * present_pages should take it for reading to get a stable value.
         *
         * Read access to managed_pages should be safe because it's unsigned
         * long. Write access to zone->managed_pages and totalram_pages are
@@ -766,7 +765,7 @@ typedef struct pglist_data {
        nodemask_t reclaim_nodes;       /* Nodes allowed to reclaim from */
        wait_queue_head_t kswapd_wait;
        wait_queue_head_t pfmemalloc_wait;
-       struct task_struct *kswapd;     /* Protected by lock_memory_hotplug() */
+       struct task_struct *kswapd;     /* Protected by mem_hotplug_sem */
        int kswapd_max_order;
        enum zone_type classzone_idx;
 #ifdef CONFIG_NUMA_BALANCING
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a650db29606f..052db2e72035 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -31,6 +31,7 @@
 #include <linux/stop_machine.h>
 #include <linux/hugetlb.h>
 #include <linux/memblock.h>
+#include <linux/rwsem.h>
 
 #include <asm/tlbflush.h>
 
@@ -47,16 +48,16 @@ static void generic_online_page(struct page *page);
 
 static online_page_callback_t online_page_callback = generic_online_page;
 
-DEFINE_MUTEX(mem_hotplug_mutex);
+static DECLARE_RWSEM(mem_hotplug_sem);
 
 void lock_memory_hotplug(void)
 {
-       mutex_lock(&mem_hotplug_mutex);
+       down_read(&mem_hotplug_sem);
 }
 
 void unlock_memory_hotplug(void)
 {
-       mutex_unlock(&mem_hotplug_mutex);
+       up_read(&mem_hotplug_sem);
 }
 
 
@@ -727,14 +728,14 @@ int set_online_page_callback(online_page_callback_t 
callback)
 {
        int rc = -EINVAL;
 
-       lock_memory_hotplug();
+       down_write(&mem_hotplug_sem);
 
        if (online_page_callback == generic_online_page) {
                online_page_callback = callback;
                rc = 0;
        }
 
-       unlock_memory_hotplug();
+       up_write(&mem_hotplug_sem);
 
        return rc;
 }
@@ -744,14 +745,14 @@ int restore_online_page_callback(online_page_callback_t 
callback)
 {
        int rc = -EINVAL;
 
-       lock_memory_hotplug();
+       down_write(&mem_hotplug_sem);
 
        if (online_page_callback == callback) {
                online_page_callback = generic_online_page;
                rc = 0;
        }
 
-       unlock_memory_hotplug();
+       up_write(&mem_hotplug_sem);
 
        return rc;
 }
@@ -899,7 +900,7 @@ int __ref online_pages(unsigned long pfn, unsigned long 
nr_pages, int online_typ
        int ret;
        struct memory_notify arg;
 
-       lock_memory_hotplug();
+       down_write(&mem_hotplug_sem);
        /*
         * This doesn't need a lock to do pfn_to_page().
         * The section can't be removed here because of the
@@ -907,23 +908,18 @@ int __ref online_pages(unsigned long pfn, unsigned long 
nr_pages, int online_typ
         */
        zone = page_zone(pfn_to_page(pfn));
 
+       ret = -EINVAL;
        if ((zone_idx(zone) > ZONE_NORMAL || online_type == ONLINE_MOVABLE) &&
-           !can_online_high_movable(zone)) {
-               unlock_memory_hotplug();
-               return -EINVAL;
-       }
+           !can_online_high_movable(zone))
+               goto out;
 
        if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) {
-               if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages)) {
-                       unlock_memory_hotplug();
-                       return -EINVAL;
-               }
+               if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
+                       goto out;
        }
        if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 
1) {
-               if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages)) {
-                       unlock_memory_hotplug();
-                       return -EINVAL;
-               }
+               if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
+                       goto out;
        }
 
        /* Previous code may changed the zone of the pfn range */
@@ -939,8 +935,7 @@ int __ref online_pages(unsigned long pfn, unsigned long 
nr_pages, int online_typ
        ret = notifier_to_errno(ret);
        if (ret) {
                memory_notify(MEM_CANCEL_ONLINE, &arg);
-               unlock_memory_hotplug();
-               return ret;
+               goto out;
        }
        /*
         * If this zone is not populated, then it is not in zonelist.
@@ -964,8 +959,7 @@ int __ref online_pages(unsigned long pfn, unsigned long 
nr_pages, int online_typ
                       (((unsigned long long) pfn + nr_pages)
                            << PAGE_SHIFT) - 1);
                memory_notify(MEM_CANCEL_ONLINE, &arg);
-               unlock_memory_hotplug();
-               return ret;
+               goto out;
        }
 
        zone->present_pages += onlined_pages;
@@ -995,9 +989,9 @@ int __ref online_pages(unsigned long pfn, unsigned long 
nr_pages, int online_typ
 
        if (onlined_pages)
                memory_notify(MEM_ONLINE, &arg);
-       unlock_memory_hotplug();
-
-       return 0;
+out:
+       up_write(&mem_hotplug_sem);
+       return ret;
 }
 #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 
@@ -1055,7 +1049,7 @@ int try_online_node(int nid)
        if (node_online(nid))
                return 0;
 
-       lock_memory_hotplug();
+       down_write(&mem_hotplug_sem);
        pgdat = hotadd_new_pgdat(nid, 0);
        if (!pgdat) {
                pr_err("Cannot online node %d due to NULL pgdat\n", nid);
@@ -1073,7 +1067,7 @@ int try_online_node(int nid)
        }
 
 out:
-       unlock_memory_hotplug();
+       up_write(&mem_hotplug_sem);
        return ret;
 }
 
@@ -1117,7 +1111,7 @@ int __ref add_memory(int nid, u64 start, u64 size)
                new_pgdat = !p;
        }
 
-       lock_memory_hotplug();
+       down_write(&mem_hotplug_sem);
 
        new_node = !node_online(nid);
        if (new_node) {
@@ -1158,7 +1152,7 @@ error:
        release_memory_resource(res);
 
 out:
-       unlock_memory_hotplug();
+       up_write(&mem_hotplug_sem);
        return ret;
 }
 EXPORT_SYMBOL_GPL(add_memory);
@@ -1565,7 +1559,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
        if (!test_pages_in_a_zone(start_pfn, end_pfn))
                return -EINVAL;
 
-       lock_memory_hotplug();
+       down_write(&mem_hotplug_sem);
 
        zone = page_zone(pfn_to_page(start_pfn));
        node = zone_to_nid(zone);
@@ -1672,7 +1666,7 @@ repeat:
        writeback_set_ratelimit();
 
        memory_notify(MEM_OFFLINE, &arg);
-       unlock_memory_hotplug();
+       up_write(&mem_hotplug_sem);
        return 0;
 
 failed_removal:
@@ -1684,7 +1678,7 @@ failed_removal:
        undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
 
 out:
-       unlock_memory_hotplug();
+       up_write(&mem_hotplug_sem);
        return ret;
 }
 
@@ -1888,7 +1882,7 @@ void __ref remove_memory(int nid, u64 start, u64 size)
 
        BUG_ON(check_hotplug_memory_range(start, size));
 
-       lock_memory_hotplug();
+       down_write(&mem_hotplug_sem);
 
        /*
         * All memory blocks must be offlined before removing memory.  Check
@@ -1897,10 +1891,8 @@ void __ref remove_memory(int nid, u64 start, u64 size)
         */
        ret = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL,
                                check_memblock_offlined_cb);
-       if (ret) {
-               unlock_memory_hotplug();
+       if (ret)
                BUG();
-       }
 
        /* remove memmap entry */
        firmware_map_remove(start, start + size, "System RAM");
@@ -1909,7 +1901,7 @@ void __ref remove_memory(int nid, u64 start, u64 size)
 
        try_offline_node(nid);
 
-       unlock_memory_hotplug();
+       up_write(&mem_hotplug_sem);
 }
 EXPORT_SYMBOL_GPL(remove_memory);
 #endif /* CONFIG_MEMORY_HOTREMOVE */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 06879ead7380..b6fe28a5dcd1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3418,7 +3418,7 @@ int kswapd_run(int nid)
 
 /*
  * Called by memory hotplug when all memory in a node is offlined.  Caller must
- * hold lock_memory_hotplug().
+ * hold mem_hotplug_sem for writing.
  */
 void kswapd_stop(int nid)
 {
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to