The following commit 0a9fe8ca844d ("x86/mm: Validate 
kernel_physical_mapping_init()
PTE population") triggers the below warning in the SEV guest.

WARNING: CPU: 0 PID: 0 at arch/x86/include/asm/pgalloc.h:87 
phys_pmd_init+0x30d/0x386
Call Trace:
 kernel_physical_mapping_init+0xce/0x259
 early_set_memory_enc_dec+0x10f/0x160
 kvm_smp_prepare_boot_cpu+0x71/0x9d
 start_kernel+0x1c9/0x50b
 secondary_startup_64+0xa4/0xb0

The SEV guest calls kernel_physical_mapping_init() to clear the encryption
mask from an existing mapping. While clearing the encryption mask
kernel_physical_mapping_init() splits the large pages into the smaller.
To split the page, the kernel_physical_mapping_init() allocates a new page
and updates the existing entry. The set_{pud,pmd}_safe triggers warning
when updating the entry with page in the present state.

Add a new kernel_physical_mapping_change() which uses the non-safe variants
of set_{pmd,pud,p4d}() and {pmd,pud,p4d}_populate() routines when updating
the entry. Since the kernel_physical_mapping_change() may replace the
existing entry with a new entry so the caller is responsible to issue the
TLB flushes. Update the early_set_memory_enc_dec() to use
kernel_physical_mapping_change() when it wants to clear the memory
encryption mask from the page table entry.

Signed-off-by: Brijesh Singh <brijesh.si...@amd.com>
Fixes: 0a9fe8ca844d (x86/mm: Validate kernel_physical_mapping_init() ...)
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Dave Hansen <dave.han...@intel.com>
Cc: Dan Williams <dan.j.willi...@intel.com>
Cc: Kirill A. Shutemov <kirill.shute...@linux.intel.com>
Cc: Peter Zijlstra (Intel) <pet...@infradead.org>
Cc: Andy Lutomirski <l...@kernel.org>
Cc: Borislav Petkov <b...@alien8.de>
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Tom Lendacky <thomas.lenda...@amd.com>
---

Changes since v1:
 - add kernel_physical_mapping_change() which uses non-safe variants of
   set_{pmd,pud,pte}.

 arch/x86/mm/init_64.c     | 137 ++++++++++++++++++++++++++++----------
 arch/x86/mm/mem_encrypt.c |   6 +-
 arch/x86/mm/mm_internal.h |   3 +
 3 files changed, 106 insertions(+), 40 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bccff68e3267..a2b6df99c4be 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -58,6 +58,37 @@
 
 #include "ident_map.c"
 
+#define DEFINE_POPULATE(fname, type1, type2, safe)             \
+static inline void __##fname(struct mm_struct *mm,             \
+               type1##_t *arg1, type2##_t *arg2, bool safe)    \
+{                                                              \
+       if (safe)                                               \
+               fname##_safe(mm, arg1, arg2);                   \
+       else                                                    \
+               fname(mm, arg1, arg2);                          \
+}
+
+DEFINE_POPULATE(p4d_populate, p4d, pud, safe)
+DEFINE_POPULATE(pgd_populate, pgd, p4d, safe)
+DEFINE_POPULATE(pud_populate, pud, pmd, safe)
+DEFINE_POPULATE(pmd_populate_kernel, pmd, pte, safe)
+
+#define DEFINE_ENTRY(type1, type2, safe)                       \
+static inline void __set_##type1(type1##_t *arg1,              \
+                       type2##_t arg2, bool safe)              \
+{                                                              \
+       if (safe)                                               \
+               set_##type1##_safe(arg1, arg2);                 \
+       else                                                    \
+               set_##type1(arg1, arg2);                        \
+}
+
+DEFINE_ENTRY(p4d, p4d, safe)
+DEFINE_ENTRY(pud, pud, safe)
+DEFINE_ENTRY(pmd, pmd, safe)
+DEFINE_ENTRY(pte, pte, safe)
+
+
 /*
  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
  * physical space so we can cache the place of the first one and move
@@ -414,7 +445,7 @@ void __init cleanup_highmap(void)
  */
 static unsigned long __meminit
 phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
-             pgprot_t prot)
+             pgprot_t prot, bool safe)
 {
        unsigned long pages = 0, paddr_next;
        unsigned long paddr_last = paddr_end;
@@ -432,7 +463,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, 
unsigned long paddr_end,
                                             E820_TYPE_RAM) &&
                            !e820__mapped_any(paddr & PAGE_MASK, paddr_next,
                                             E820_TYPE_RESERVED_KERN))
-                               set_pte_safe(pte, __pte(0));
+                               __set_pte(pte, __pte(0), safe);
                        continue;
                }
 
@@ -452,7 +483,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, 
unsigned long paddr_end,
                        pr_info("   pte=%p addr=%lx pte=%016lx\n", pte, paddr,
                                pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
                pages++;
-               set_pte_safe(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
+               __set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot), safe);
                paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
        }
 
@@ -468,7 +499,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, 
unsigned long paddr_end,
  */
 static unsigned long __meminit
 phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
-             unsigned long page_size_mask, pgprot_t prot)
+             unsigned long page_size_mask, pgprot_t prot, bool safe)
 {
        unsigned long pages = 0, paddr_next;
        unsigned long paddr_last = paddr_end;
@@ -487,7 +518,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, 
unsigned long paddr_end,
                                             E820_TYPE_RAM) &&
                            !e820__mapped_any(paddr & PMD_MASK, paddr_next,
                                             E820_TYPE_RESERVED_KERN))
-                               set_pmd_safe(pmd, __pmd(0));
+                               __set_pmd(pmd, __pmd(0), safe);
                        continue;
                }
 
@@ -496,7 +527,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, 
unsigned long paddr_end,
                                spin_lock(&init_mm.page_table_lock);
                                pte = (pte_t *)pmd_page_vaddr(*pmd);
                                paddr_last = phys_pte_init(pte, paddr,
-                                                          paddr_end, prot);
+                                                          paddr_end, prot,
+                                                          safe);
                                spin_unlock(&init_mm.page_table_lock);
                                continue;
                        }
@@ -524,19 +556,20 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, 
unsigned long paddr_end,
                if (page_size_mask & (1<<PG_LEVEL_2M)) {
                        pages++;
                        spin_lock(&init_mm.page_table_lock);
-                       set_pte_safe((pte_t *)pmd,
-                               pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
-                                       __pgprot(pgprot_val(prot) | 
_PAGE_PSE)));
+                       __set_pte((pte_t *)pmd,
+                                 pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
+                                         __pgprot(pgprot_val(prot) | 
_PAGE_PSE)),
+                                 safe);
                        spin_unlock(&init_mm.page_table_lock);
                        paddr_last = paddr_next;
                        continue;
                }
 
                pte = alloc_low_page();
-               paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot);
+               paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, 
safe);
 
                spin_lock(&init_mm.page_table_lock);
-               pmd_populate_kernel_safe(&init_mm, pmd, pte);
+               __pmd_populate_kernel(&init_mm, pmd, pte, safe);
                spin_unlock(&init_mm.page_table_lock);
        }
        update_page_count(PG_LEVEL_2M, pages);
@@ -551,7 +584,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, 
unsigned long paddr_end,
  */
 static unsigned long __meminit
 phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
-             unsigned long page_size_mask)
+             unsigned long page_size_mask, bool safe)
 {
        unsigned long pages = 0, paddr_next;
        unsigned long paddr_last = paddr_end;
@@ -573,7 +606,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, 
unsigned long paddr_end,
                                             E820_TYPE_RAM) &&
                            !e820__mapped_any(paddr & PUD_MASK, paddr_next,
                                             E820_TYPE_RESERVED_KERN))
-                               set_pud_safe(pud, __pud(0));
+                               __set_pud(pud, __pud(0), safe);
                        continue;
                }
 
@@ -583,7 +616,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, 
unsigned long paddr_end,
                                paddr_last = phys_pmd_init(pmd, paddr,
                                                           paddr_end,
                                                           page_size_mask,
-                                                          prot);
+                                                          prot, safe);
                                continue;
                        }
                        /*
@@ -610,9 +643,9 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, 
unsigned long paddr_end,
                if (page_size_mask & (1<<PG_LEVEL_1G)) {
                        pages++;
                        spin_lock(&init_mm.page_table_lock);
-                       set_pte_safe((pte_t *)pud,
-                               pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
-                                       PAGE_KERNEL_LARGE));
+                       __set_pte((pte_t *)pud,
+                                  pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
+                                  PAGE_KERNEL_LARGE), safe);
                        spin_unlock(&init_mm.page_table_lock);
                        paddr_last = paddr_next;
                        continue;
@@ -620,10 +653,10 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, 
unsigned long paddr_end,
 
                pmd = alloc_low_page();
                paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
-                                          page_size_mask, prot);
+                                          page_size_mask, prot, safe);
 
                spin_lock(&init_mm.page_table_lock);
-               pud_populate_safe(&init_mm, pud, pmd);
+               __pud_populate(&init_mm, pud, pmd, safe);
                spin_unlock(&init_mm.page_table_lock);
        }
 
@@ -634,14 +667,15 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, 
unsigned long paddr_end,
 
 static unsigned long __meminit
 phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
-             unsigned long page_size_mask)
+             unsigned long page_size_mask, bool safe)
 {
        unsigned long paddr_next, paddr_last = paddr_end;
        unsigned long vaddr = (unsigned long)__va(paddr);
        int i = p4d_index(vaddr);
 
        if (!pgtable_l5_enabled())
-               return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, 
page_size_mask);
+               return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
+                                    page_size_mask, safe);
 
        for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
                p4d_t *p4d;
@@ -657,7 +691,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, 
unsigned long paddr_end,
                                             E820_TYPE_RAM) &&
                            !e820__mapped_any(paddr & P4D_MASK, paddr_next,
                                             E820_TYPE_RESERVED_KERN))
-                               set_p4d_safe(p4d, __p4d(0));
+                               __set_p4d(p4d, __p4d(0), safe);
                        continue;
                }
 
@@ -665,31 +699,27 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, 
unsigned long paddr_end,
                        pud = pud_offset(p4d, 0);
                        paddr_last = phys_pud_init(pud, paddr,
                                        paddr_end,
-                                       page_size_mask);
+                                       page_size_mask, safe);
                        continue;
                }
 
                pud = alloc_low_page();
                paddr_last = phys_pud_init(pud, paddr, paddr_end,
-                                          page_size_mask);
+                                          page_size_mask, safe);
 
                spin_lock(&init_mm.page_table_lock);
-               p4d_populate_safe(&init_mm, p4d, pud);
+               __p4d_populate(&init_mm, p4d, pud, safe);
                spin_unlock(&init_mm.page_table_lock);
        }
 
        return paddr_last;
 }
 
-/*
- * Create page table mapping for the physical memory for specific physical
- * addresses. The virtual and physical addresses have to be aligned on PMD 
level
- * down. It returns the last physical address mapped.
- */
-unsigned long __meminit
-kernel_physical_mapping_init(unsigned long paddr_start,
+static unsigned long __meminit
+__kernel_physical_mapping_init(unsigned long paddr_start,
                             unsigned long paddr_end,
-                            unsigned long page_size_mask)
+                            unsigned long page_size_mask,
+                            bool safe)
 {
        bool pgd_changed = false;
        unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
@@ -709,19 +739,22 @@ kernel_physical_mapping_init(unsigned long paddr_start,
                        p4d = (p4d_t *)pgd_page_vaddr(*pgd);
                        paddr_last = phys_p4d_init(p4d, __pa(vaddr),
                                                   __pa(vaddr_end),
-                                                  page_size_mask);
+                                                  page_size_mask,
+                                                  safe);
                        continue;
                }
 
                p4d = alloc_low_page();
                paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
-                                          page_size_mask);
+                                          page_size_mask, safe);
 
                spin_lock(&init_mm.page_table_lock);
                if (pgtable_l5_enabled())
-                       pgd_populate_safe(&init_mm, pgd, p4d);
+                       __pgd_populate(&init_mm, pgd, p4d, safe);
                else
-                       p4d_populate_safe(&init_mm, p4d_offset(pgd, vaddr), 
(pud_t *) p4d);
+                       __p4d_populate(&init_mm, p4d_offset(pgd, vaddr),
+                                           (pud_t *) p4d, safe);
+
                spin_unlock(&init_mm.page_table_lock);
                pgd_changed = true;
        }
@@ -732,6 +765,36 @@ kernel_physical_mapping_init(unsigned long paddr_start,
        return paddr_last;
 }
 
+
+/*
+ * Create page table mapping for the physical memory for specific physical
+ * addresses. The virtual and physical addresses have to be aligned on PMD 
level
+ * down. It returns the last physical address mapped.
+ */
+unsigned long __meminit
+kernel_physical_mapping_init(unsigned long paddr_start,
+                            unsigned long paddr_end,
+                            unsigned long page_size_mask)
+{
+       return __kernel_physical_mapping_init(paddr_start, paddr_end,
+                                             page_size_mask, true);
+}
+
+/*
+ * This function is similar to the kernel_physical_mapping_init() with 
exception
+ * that it uses set_{pud,pmd} instead of the set_{pud,pte}_safe when updating
+ * the mapping. The caller is responsible to flush the TLBs after the function
+ * returns.
+ */
+unsigned long __meminit
+kernel_physical_mapping_change(unsigned long paddr_start,
+                              unsigned long paddr_end,
+                              unsigned long page_size_mask)
+{
+       return __kernel_physical_mapping_init(paddr_start, paddr_end,
+                                             page_size_mask, false);
+}
+
 #ifndef CONFIG_NUMA
 void __init initmem_init(void)
 {
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 385afa2b9e17..d17cd3233b7c 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -301,9 +301,9 @@ static int __init early_set_memory_enc_dec(unsigned long 
vaddr,
                else
                        split_page_size_mask = 1 << PG_LEVEL_2M;
 
-               kernel_physical_mapping_init(__pa(vaddr & pmask),
-                                            __pa((vaddr_end & pmask) + psize),
-                                            split_page_size_mask);
+               kernel_physical_mapping_change(__pa(vaddr & pmask),
+                                              __pa((vaddr_end & pmask) + 
psize),
+                                              split_page_size_mask);
        }
 
        ret = 0;
diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h
index 319bde386d5f..eeae142062ed 100644
--- a/arch/x86/mm/mm_internal.h
+++ b/arch/x86/mm/mm_internal.h
@@ -13,6 +13,9 @@ void early_ioremap_page_table_range_init(void);
 unsigned long kernel_physical_mapping_init(unsigned long start,
                                             unsigned long end,
                                             unsigned long page_size_mask);
+unsigned long kernel_physical_mapping_change(unsigned long start,
+                                            unsigned long end,
+                                            unsigned long page_size_mask);
 void zone_sizes_init(void);
 
 extern int after_bootmem;
-- 
2.17.1

Reply via email to