This patch introduces three ways to resolve quasi-hwpoisoned pages:
 1. unpoison: this is a test feature, but if users accept data lost (then
    continue with rereading old data from storage,) this could be tolerable.
 2. truncate: if discarding a part of a file which includes a memory error
    is OK for your applications, this could be reasonable too.
 3. full page overwrite: if your application is prepared to dirty pagecache
    error and it has a copy data (or it can recreate the proper data,)
    the application can overwrite the page-sized address range on the error
    and continue to run without caring about the error.

Signed-off-by: Naoya Horiguchi <n-horigu...@ah.jp.nec.com>
---
 include/linux/pagemap.h | 16 +++++++++++++
 mm/filemap.c            | 14 ++++++++---
 mm/memory-failure.c     | 62 ++++++++++++++++++++++++++++++++++++++++++++++++-
 mm/truncate.c           |  7 ++++++
 4 files changed, 95 insertions(+), 4 deletions(-)

diff --git v3.14-rc6.orig/include/linux/pagemap.h 
v3.14-rc6/include/linux/pagemap.h
index 5e234d0d0baf..715962f7ea7a 100644
--- v3.14-rc6.orig/include/linux/pagemap.h
+++ v3.14-rc6/include/linux/pagemap.h
@@ -589,12 +589,28 @@ static inline int add_to_page_cache(struct page *page,
 #ifdef CONFIG_MEMORY_FAILURE
 bool mapping_hwpoisoned_range(struct address_space *mapping,
                                loff_t start_byte, loff_t end_byte);
+bool page_quasi_hwpoisoned(struct address_space *mapping, struct page *page);
+void hwpoison_resolve_pagecache_error(struct address_space *mapping,
+                               struct page *page, bool free);
+bool hwpoison_partial_overwrite(struct address_space *mapping,
+                               loff_t pos, size_t count);
 #else
 static inline bool mapping_hwpoisoned_range(struct address_space *mapping,
                                loff_t start_byte, loff_t end_byte)
 {
        return false;
 }
+static inline bool page_quasi_hwpoisoned(struct address_space *mapping,
+                                       struct page *page)
+{
+       return false;
+}
+#define hwpoison_resolve_pagecache_error(mapping, page, free) do {} while (0)
+static inline bool hwpoison_partial_overwrite(struct address_space *mapping,
+                               loff_t pos, size_t count)
+{
+       return false;
+}
 #endif /* CONFIG_MEMORY_FAILURE */
 
 #endif /* _LINUX_PAGEMAP_H */
diff --git v3.14-rc6.orig/mm/filemap.c v3.14-rc6/mm/filemap.c
index 887f2dfaf185..f58b36e313ad 100644
--- v3.14-rc6.orig/mm/filemap.c
+++ v3.14-rc6/mm/filemap.c
@@ -2110,8 +2110,7 @@ inline int generic_write_checks(struct file *file, loff_t 
*pos, size_t *count, i
         if (unlikely(*pos < 0))
                 return -EINVAL;
 
-       if (unlikely(mapping_hwpoisoned_range(file->f_mapping, *pos,
-                                             *pos + *count)))
+       if (unlikely(hwpoison_partial_overwrite(file->f_mapping, *pos, *count)))
                return -EHWPOISON;
 
        if (!isblk) {
@@ -2222,7 +2221,13 @@ generic_file_direct_write(struct kiocb *iocb, const 
struct iovec *iov,
        end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
 
        written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 
1);
-       if (written)
+       /*
+        * When the write range includes hwpoisoned region (then written is
+        * -EHWPOISON,) we already confirmed in generic_write_checks() that
+        * it's full page overwrite and we can safely invalidate the error,
+        * so the write doesn't have to fail.
+        */
+       if (written && written != -EHWPOISON)
                goto out;
 
        /*
@@ -2362,6 +2367,9 @@ static ssize_t generic_perform_write(struct file *file,
                if (mapping_writably_mapped(mapping))
                        flush_dcache_page(page);
 
+               if (page_quasi_hwpoisoned(mapping, page))
+                       hwpoison_resolve_pagecache_error(mapping, page, false);
+
                pagefault_disable();
                copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
                pagefault_enable();
diff --git v3.14-rc6.orig/mm/memory-failure.c v3.14-rc6/mm/memory-failure.c
index 34f2c046af22..0eca5449d251 100644
--- v3.14-rc6.orig/mm/memory-failure.c
+++ v3.14-rc6/mm/memory-failure.c
@@ -665,6 +665,57 @@ static void clear_pagecache_tag_hwpoison(struct 
address_space *mapping,
        spin_unlock_irq(&mapping->tree_lock);
 }
 
+inline bool page_quasi_hwpoisoned(struct address_space *mapping,
+                                       struct page *page)
+{
+       if (!sysctl_memory_failure_recovery)
+               return false;
+       return unlikely(get_pagecache_tag_hwpoison(mapping, page_index(page)));
+}
+
+/*
+ * This function clears a quasi-hwpoisoned page and turns it into a normal
+ * LRU page. Callers should check that @page is really quasi-hwpoisoned,
+ * and must not call this for real error pages.
+ */
+void hwpoison_resolve_pagecache_error(struct address_space *mapping,
+                                     struct page *page, bool free)
+{
+       VM_BUG_ON(PageLRU(page));
+       VM_BUG_ON(!PageLocked(page));
+
+       ClearPageHWPoison(page);
+       clear_pagecache_tag_hwpoison(mapping, page_index(page));
+       dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page));
+       putback_lru_page(page);
+       if (free) {
+               lru_add_drain_all();
+               delete_from_page_cache(page);
+       }
+       iput(mapping->host);
+}
+
+/*
+ * Return true if a given range [pos, pos+count) *partially* overlaps with
+ * hwpoisoned page. Effectively it checks only boundary pages' overlapness.
+ */
+bool hwpoison_partial_overwrite(struct address_space *mapping,
+                               loff_t pos, size_t count)
+{
+       if (!sysctl_memory_failure_recovery)
+               return false;
+       if (!mapping_hwpoisoned_range(mapping, pos, pos + count))
+               return false;
+
+       if (!PAGE_ALIGNED(pos) &&
+           get_pagecache_tag_hwpoison(mapping, pos >> PAGE_SHIFT))
+               return true;
+       if (!PAGE_ALIGNED(pos + count) &&
+           get_pagecache_tag_hwpoison(mapping, (pos + count) >> PAGE_SHIFT))
+               return true;
+       return false;
+}
+
 /*
  * Dirty pagecache page
  *
@@ -691,7 +742,10 @@ static void clear_pagecache_tag_hwpoison(struct 
address_space *mapping,
  *
  * This quasi-hwpoisoned page works to keep reporting the error for all
  * processes which try to access to the error address until it is resolved
- * or the system reboots.
+ * or the system reboots. Quasi-hwpoisoned pages can be resolved by unpoison,
+ * truncate, and full page overwrite. In full page overwrite, the quasi-
+ * hwpoisoned pages safely turn into the normal LRU pages, so we expect
+ * userspace to do this when they received the error report if possible.
  *
  * Issues: when the error hit a hole page the error is not properly
  * propagated.
@@ -1496,12 +1550,18 @@ int unpoison_memory(unsigned long pfn)
         * the free buddy page pool.
         */
        if (TestClearPageHWPoison(page)) {
+               struct address_space *mapping = page_mapping(page);
+               if (mapping && page_quasi_hwpoisoned(mapping, page)) {
+                       hwpoison_resolve_pagecache_error(mapping, page, true);
+                       goto unlock;
+               }
                pr_info("MCE: Software-unpoisoned page %#lx\n", pfn);
                atomic_long_sub(nr_pages, &num_poisoned_pages);
                freeit = 1;
                if (PageHuge(page))
                        clear_page_hwpoison_huge_page(page);
        }
+unlock:
        unlock_page(page);
 
        put_page(page);
diff --git v3.14-rc6.orig/mm/truncate.c v3.14-rc6/mm/truncate.c
index 353b683afd6e..92d7097dfc6d 100644
--- v3.14-rc6.orig/mm/truncate.c
+++ v3.14-rc6/mm/truncate.c
@@ -103,6 +103,10 @@ truncate_complete_page(struct address_space *mapping, 
struct page *page)
        cancel_dirty_page(page, PAGE_CACHE_SIZE);
 
        ClearPageMappedToDisk(page);
+
+       if (page_quasi_hwpoisoned(mapping, page))
+               hwpoison_resolve_pagecache_error(mapping, page, false);
+
        delete_from_page_cache(page);
        return 0;
 }
@@ -439,6 +443,9 @@ invalidate_complete_page2(struct address_space *mapping, 
struct page *page)
        if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
                return 0;
 
+       if (page_quasi_hwpoisoned(mapping, page))
+               hwpoison_resolve_pagecache_error(mapping, page, false);
+
        spin_lock_irq(&mapping->tree_lock);
        if (PageDirty(page))
                goto failed;
-- 
1.8.5.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to