Mark poisoned page as not present, and to reverse the 'np' effect,
restate the _PAGE_PRESENT bit. Please refer to discussions here for
reason behind the decision.
https://lore.kernel.org/all/capcyv4hrxpb1tasbzug-ggdvs0oofkxmxlihmktg_kfi7yb...@mail.gmail.com/

Now since poisoned page is marked as not-present, in order to
avoid writing to a 'np' page and trigger kernel Oops, also fix
pmem_do_write().

Fixes: 284ce4011ba6 ("x86/memory_failure: Introduce {set, clear}_mce_nospec()")
Signed-off-by: Jane Chu <[email protected]>
---
 arch/x86/kernel/cpu/mce/core.c |  6 +++---
 arch/x86/mm/pat/set_memory.c   | 21 +++++++++------------
 drivers/nvdimm/pmem.c          | 31 +++++++------------------------
 include/linux/set_memory.h     |  4 ++--
 4 files changed, 21 insertions(+), 41 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 5818b837fd4d..8d12739f283d 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -613,7 +613,7 @@ static int uc_decode_notifier(struct notifier_block *nb, 
unsigned long val,
 
        pfn = mce->addr >> PAGE_SHIFT;
        if (!memory_failure(pfn, 0)) {
-               set_mce_nospec(pfn, whole_page(mce));
+               set_mce_nospec(pfn);
                mce->kflags |= MCE_HANDLED_UC;
        }
 
@@ -1297,7 +1297,7 @@ static void kill_me_maybe(struct callback_head *cb)
 
        ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags);
        if (!ret) {
-               set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
+               set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
                sync_core();
                return;
        }
@@ -1321,7 +1321,7 @@ static void kill_me_never(struct callback_head *cb)
        p->mce_count = 0;
        pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
        if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
-               set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
+               set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
 }
 
 static void queue_task_work(struct mce *m, char *msg, void (*func)(struct 
callback_head *))
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 9abc6077d768..747614c3dd83 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1925,14 +1925,14 @@ int set_memory_wb(unsigned long addr, int numpages)
 }
 EXPORT_SYMBOL(set_memory_wb);
 
+static int _set_memory_present(unsigned long addr, int numpages)
+{
+       return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_PRESENT), 
0);
+}
+
 #ifdef CONFIG_X86_64
-/*
- * Prevent speculative access to the page by either unmapping
- * it (if we do not require access to any part of the page) or
- * marking it uncacheable (if we want to try to retrieve data
- * from non-poisoned lines in the page).
- */
-int set_mce_nospec(unsigned long pfn, bool unmap)
+/* Prevent speculative access to a page by marking it not-present */
+int set_mce_nospec(unsigned long pfn)
 {
        unsigned long decoy_addr;
        int rc;
@@ -1954,10 +1954,7 @@ int set_mce_nospec(unsigned long pfn, bool unmap)
         */
        decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
 
-       if (unmap)
-               rc = set_memory_np(decoy_addr, 1);
-       else
-               rc = set_memory_uc(decoy_addr, 1);
+       rc = set_memory_np(decoy_addr, 1);
        if (rc)
                pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
        return rc;
@@ -1967,7 +1964,7 @@ EXPORT_SYMBOL(set_mce_nospec);
 /* Restore full speculative operation to the pfn. */
 int clear_mce_nospec(unsigned long pfn)
 {
-       return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1);
+       return _set_memory_present((unsigned long) pfn_to_kaddr(pfn), 1);
 }
 EXPORT_SYMBOL(clear_mce_nospec);
 
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 58d95242a836..30c71a68175b 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -158,36 +158,19 @@ static blk_status_t pmem_do_write(struct pmem_device 
*pmem,
                        struct page *page, unsigned int page_off,
                        sector_t sector, unsigned int len)
 {
-       blk_status_t rc = BLK_STS_OK;
-       bool bad_pmem = false;
        phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
        void *pmem_addr = pmem->virt_addr + pmem_off;
 
-       if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
-               bad_pmem = true;
+       if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) {
+               blk_status_t rc = pmem_clear_poison(pmem, pmem_off, len);
 
-       /*
-        * Note that we write the data both before and after
-        * clearing poison.  The write before clear poison
-        * handles situations where the latest written data is
-        * preserved and the clear poison operation simply marks
-        * the address range as valid without changing the data.
-        * In this case application software can assume that an
-        * interrupted write will either return the new good
-        * data or an error.
-        *
-        * However, if pmem_clear_poison() leaves the data in an
-        * indeterminate state we need to perform the write
-        * after clear poison.
-        */
+               if (rc != BLK_STS_OK)
+                       pr_warn_ratelimited("%s: failed to clear poison\n", 
__func__);
+                       return rc;
+       }
        flush_dcache_page(page);
        write_pmem(pmem_addr, page, page_off, len);
-       if (unlikely(bad_pmem)) {
-               rc = pmem_clear_poison(pmem, pmem_off, len);
-               write_pmem(pmem_addr, page, page_off, len);
-       }
-
-       return rc;
+       return BLK_STS_OK;
 }
 
 static void pmem_submit_bio(struct bio *bio)
diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index d6263d7afb55..cde2d8687a7b 100644
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -43,10 +43,10 @@ static inline bool can_set_direct_map(void)
 #endif /* CONFIG_ARCH_HAS_SET_DIRECT_MAP */
 
 #ifdef CONFIG_X86_64
-int set_mce_nospec(unsigned long pfn, bool unmap);
+int set_mce_nospec(unsigned long pfn);
 int clear_mce_nospec(unsigned long pfn);
 #else
-static inline int set_mce_nospec(unsigned long pfn, bool unmap)
+static inline int set_mce_nospec(unsigned long pfn)
 {
        return 0;
 }
-- 
2.18.4


Reply via email to