Subject: [PATCH] x86, 64bit: use #PE handler to setup page table for data

We need to access data area that is not mapped in arch/x86/kernel/head_64.S
two case:
a. load microcode from microcode
b. when zero_page and command_line ls loaded high above 1G.

with this one, will don't not need to ioremap_init ahead...

the pgt buffer is from BRK, and we have enough space there.

Also later init_mem_mapping will resuse those pgt.

This patch is most from HPA.
others from Yinghai:
1. use it with BRK
2. only map 2M one time, becase use zero_page, and command line is very small
   also microcode should be small too 128k ?
   and should not hit possible hole that should be mapped.
3. make it work with kexec when phys_base is not zero.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 arch/x86/kernel/head64.c  |   54 ++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/head_64.S |   13 ++++++++---
 2 files changed, 64 insertions(+), 3 deletions(-)

Index: linux-2.6/arch/x86/kernel/head64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head64.c
+++ linux-2.6/arch/x86/kernel/head64.c
@@ -26,6 +26,60 @@
 #include <asm/e820.h>
 #include <asm/bios_ebda.h>
 
+/* Create a new PMD entry */
+int __init early_make_pgtable(unsigned long address)
+{
+	unsigned long physaddr = address - __PAGE_OFFSET;
+	unsigned long i;
+	pgdval_t pgd, *pgd_p;
+	pudval_t pud, *pud_p;
+	pmdval_t pmd, *pmd_p;
+
+	if (address < __PAGE_OFFSET || physaddr >= MAXMEM)
+		return -1;	/* Invalid address - puke */
+
+	pgd_p = &init_level4_pgt[pgd_index(address)].pgd;
+	pgd = *pgd_p;
+
+	/*
+	 * The use of __START_KERNEL_map rather than __PAGE_OFFSET here is
+	 * critical -- __PAGE_OFFSET would point us back into the dynamic
+	 * range and we might end up looping forever...
+	 */
+	if (pgd)
+		pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
+	else {
+		if ((char *)(_brk_end + PAGE_SIZE) > __brk_limit)
+			return -1;
+		pud_p = (pudval_t *)_brk_end;
+		_brk_end += PAGE_SIZE;
+
+		for (i = 0; i < PTRS_PER_PUD; i++)
+			pud_p[i] = 0;
+		*pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
+	}
+	pud_p += pud_index(address);
+	pud = *pud_p;
+
+	if (pud)
+		pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
+	else {
+		if ((char *)(_brk_end + PAGE_SIZE) > __brk_limit)
+			return -1;
+		pmd_p = (pmdval_t *)_brk_end;
+		_brk_end += PAGE_SIZE;
+
+		for (i = 0; i < PTRS_PER_PMD; i++)
+			pmd_p[i] = 0;
+		*pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
+	}
+	pmd = (physaddr & PMD_MASK) + __PAGE_KERNEL_LARGE;
+	pmd_p[pmd_index(address)] = pmd;
+
+	return 0;
+}
+
+
 static void __init zap_identity_mappings(void)
 {
 	pgd_t *pgd = pgd_offset_k(0UL);
Index: linux-2.6/arch/x86/kernel/head_64.S
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head_64.S
+++ linux-2.6/arch/x86/kernel/head_64.S
@@ -494,14 +494,21 @@ ENTRY(early_idt_handler)
 	pushq %r11		#  0(%rsp)
 
 	cmpl $__KERNEL_CS,96(%rsp)
-	jne 10f
+	jne 11f
 
+	cmpl $14,72(%rsp)       # Page fault?
+	jnz 10f
+	GET_CR2_INTO(%rdi)      # can clobber any volatile register if pv
+	call early_make_pgtable
+	andl %eax,%eax
+	jz 20f                  # All good
+10:
 	leaq 88(%rsp),%rdi	# Pointer to %rip
 	call early_fixup_exception
 	andl %eax,%eax
 	jnz 20f			# Found an exception entry
 
-10:
+11:
 #ifdef CONFIG_EARLY_PRINTK
 	GET_CR2_INTO(%r9)	# can clobber any volatile register if pv
 	movl 80(%rsp),%r8d	# error code
@@ -523,7 +530,7 @@ ENTRY(early_idt_handler)
 1:	hlt
 	jmp 1b
 
-20:	# Exception table entry found
+20:	# Exception table entry found or page table generaged.
 	popq %r11
 	popq %r10
 	popq %r9
