The race condition addressed in commit add05cecef80 ("mm: soft-offline: don't
free target page in successful page migration") was not closed completely,
because that can happen not only for soft-offline, but also for hard-offline.
Consider that a slab page is about to be freed into buddy pool, and then an
uncorrected memory error hits the page just after entering __free_one_page(),
then VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP) is triggered,
despite the fact that it's not necessary because the data on the affected
page is not consumed.

To solve it, this patch drops __PG_HWPOISON from page flag checks at
allocation/free time. I think it's justified because __PG_HWPOISON flags is
defined to prevent the page from being reused and setting it outside the
page's alloc-free cycle is a designed behavior (not a bug.)

And the patch reverts most of the changes from commit add05cecef80 about
the new refcounting rule of soft-offlined pages, which is no longer necessary.

Signed-off-by: Naoya Horiguchi <n-horigu...@ah.jp.nec.com>
---
 include/linux/page-flags.h | 10 +++++++---
 mm/huge_memory.c           |  7 +------
 mm/memory-failure.c        |  6 +++++-
 mm/migrate.c               |  9 +++------
 mm/page_alloc.c            |  4 ++++
 5 files changed, 20 insertions(+), 16 deletions(-)

diff --git v4.2-rc2.orig/include/linux/page-flags.h 
v4.2-rc2/include/linux/page-flags.h
index f34e040b34e9..53400f101f2d 100644
--- v4.2-rc2.orig/include/linux/page-flags.h
+++ v4.2-rc2/include/linux/page-flags.h
@@ -631,15 +631,19 @@ static inline void ClearPageSlabPfmemalloc(struct page 
*page)
         1 << PG_private | 1 << PG_private_2 | \
         1 << PG_writeback | 1 << PG_reserved | \
         1 << PG_slab    | 1 << PG_swapcache | 1 << PG_active | \
-        1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \
+        1 << PG_unevictable | __PG_MLOCKED | \
         __PG_COMPOUND_LOCK)
 
 /*
  * Flags checked when a page is prepped for return by the page allocator.
- * Pages being prepped should not have any flags set.  It they are set,
+ * Pages being prepped should not have these flags set.  It they are set,
  * there has been a kernel bug or struct page corruption.
+ *
+ * __PG_HWPOISON is exceptional because it need to be kept beyond page's
+ * alloc-free cycle to prevent from reusing the page.
  */
-#define PAGE_FLAGS_CHECK_AT_PREP       ((1 << NR_PAGEFLAGS) - 1)
+#define PAGE_FLAGS_CHECK_AT_PREP       \
+       (((1 << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON)
 
 #define PAGE_FLAGS_PRIVATE                             \
        (1 << PG_private | 1 << PG_private_2)
diff --git v4.2-rc2.orig/mm/huge_memory.c v4.2-rc2/mm/huge_memory.c
index c107094f79ba..097c7a4bfbd9 100644
--- v4.2-rc2.orig/mm/huge_memory.c
+++ v4.2-rc2/mm/huge_memory.c
@@ -1676,12 +1676,7 @@ static void __split_huge_page_refcount(struct page *page,
                /* after clearing PageTail the gup refcount can be released */
                smp_mb__after_atomic();
 
-               /*
-                * retain hwpoison flag of the poisoned tail page:
-                *   fix for the unsuitable process killed on Guest Machine(KVM)
-                *   by the memory-failure.
-                */
-               page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP | __PG_HWPOISON;
+               page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
                page_tail->flags |= (page->flags &
                                     ((1L << PG_referenced) |
                                      (1L << PG_swapbacked) |
diff --git v4.2-rc2.orig/mm/memory-failure.c v4.2-rc2/mm/memory-failure.c
index 421d7c9b30f4..755f87e4ec64 100644
--- v4.2-rc2.orig/mm/memory-failure.c
+++ v4.2-rc2/mm/memory-failure.c
@@ -1723,6 +1723,9 @@ int soft_offline_page(struct page *page, int flags)
 
        get_online_mems();
 
+       if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
+               set_migratetype_isolate(page, true);
+
        ret = get_any_page(page, pfn, flags);
        put_online_mems();
        if (ret > 0) { /* for in-use pages */
@@ -1730,7 +1733,7 @@ int soft_offline_page(struct page *page, int flags)
                        ret = soft_offline_huge_page(page, flags);
                else
                        ret = __soft_offline_page(page, flags);
-       } else if (ret == 0) { /* for free pages */
+       } else if (ret == 0) {
                if (PageHuge(page)) {
                        set_page_hwpoison_huge_page(hpage);
                        if (!dequeue_hwpoisoned_huge_page(hpage))
@@ -1741,5 +1744,6 @@ int soft_offline_page(struct page *page, int flags)
                                atomic_long_inc(&num_poisoned_pages);
                }
        }
+       unset_migratetype_isolate(page, MIGRATE_MOVABLE);
        return ret;
 }
diff --git v4.2-rc2.orig/mm/migrate.c v4.2-rc2/mm/migrate.c
index ee401e4e5ef1..c37d5772767b 100644
--- v4.2-rc2.orig/mm/migrate.c
+++ v4.2-rc2/mm/migrate.c
@@ -918,8 +918,7 @@ static int __unmap_and_move(struct page *page, struct page 
*newpage,
 static ICE_noinline int unmap_and_move(new_page_t get_new_page,
                                   free_page_t put_new_page,
                                   unsigned long private, struct page *page,
-                                  int force, enum migrate_mode mode,
-                                  enum migrate_reason reason)
+                                  int force, enum migrate_mode mode)
 {
        int rc = 0;
        int *result = NULL;
@@ -950,8 +949,7 @@ static ICE_noinline int unmap_and_move(new_page_t 
get_new_page,
                list_del(&page->lru);
                dec_zone_page_state(page, NR_ISOLATED_ANON +
                                page_is_file_cache(page));
-               if (reason != MR_MEMORY_FAILURE)
-                       putback_lru_page(page);
+               putback_lru_page(page);
        }
 
        /*
@@ -1124,8 +1122,7 @@ int migrate_pages(struct list_head *from, new_page_t 
get_new_page,
                                                pass > 2, mode);
                        else
                                rc = unmap_and_move(get_new_page, put_new_page,
-                                               private, page, pass > 2, mode,
-                                               reason);
+                                               private, page, pass > 2, mode);
 
                        switch(rc) {
                        case -ENOMEM:
diff --git v4.2-rc2.orig/mm/page_alloc.c v4.2-rc2/mm/page_alloc.c
index 506eac8b38af..e32d58ce5d2f 100644
--- v4.2-rc2.orig/mm/page_alloc.c
+++ v4.2-rc2/mm/page_alloc.c
@@ -1287,6 +1287,10 @@ static inline int check_new_page(struct page *page)
                bad_reason = "non-NULL mapping";
        if (unlikely(atomic_read(&page->_count) != 0))
                bad_reason = "nonzero _count";
+       if (unlikely(page->flags & __PG_HWPOISON)) {
+               bad_reason = "HWPoisoned (hardware-corrupted)";
+               bad_flags = __PG_HWPOISON;
+       }
        if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) {
                bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set";
                bad_flags = PAGE_FLAGS_CHECK_AT_PREP;
-- 
2.4.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to