Samuel Thibault, le sam. 27 août 2022 20:26:10 +0200, a ecrit: > I can't manage to make this boot at all. Is there a particular configure > option to pass? I have let it just use the linux 64bit compiler, I have > tried with --host=x86_64-gnu, in all cases after pressing enter at the > grub menu it loads the kernel but just reboots immediately after that, > even if I have put a jmp boot_entry at boot_entry, i.e. it's really the > load that is a problem and not any gnumach code.
Actually, building with 32bit without your changes brings me the same issue, oh joy... Samuel > Luca Dariz, le sam. 05 févr. 2022 18:51:24 +0100, a ecrit: > > * configure: compile for native x86_64 by default instead of xen > > * x86_64/Makefrag.am: introduce KERNEL_MAP_BASE to reuse the constant > > in both code and linker script > > * x86_64/ldscript: use a .boot section for the very first operations, > > until we reach long mode. This section is not really allocated, so > > it doesn't need to be freed later. The vm system is later > > initialized starting from .text and not including .boot > > * link kernel at 0x4000000 as the xen version, higher values causes > > linker errors > > * we can't use full segmentation in long mode, so we need to create a > > temporary mapping during early boot to be able to jump to high > > addresses > > * build direct map for first 4G in boothdr, it seems required by Linux > > drivers > > * add INTEL_PTE_PS bit definition to enable 2MB pages during bootstrap > > * ensure write bit is set in PDP entry access rights. This only > > applies to PAE-enabled kernels, mandatory for x86_64. On xen > > platform it seems to be handled differently > > > > Signed-off-by: Luca Dariz <l...@orpolo.org> > > --- > > configure.ac | 3 +- > > i386/configfrag.ac | 2 + > > i386/i386/i386asm.sym | 1 + > > i386/i386/vm_param.h | 2 +- > > i386/intel/pmap.c | 4 +- > > i386/intel/pmap.h | 1 + > > x86_64/Makefrag.am | 18 +++- > > x86_64/boothdr.S | 238 ++++++++++++++++++++++++++++++++++++++++++ > > x86_64/ldscript | 28 +++-- > > 9 files changed, 281 insertions(+), 16 deletions(-) > > create mode 100644 x86_64/boothdr.S > > > > diff --git a/configure.ac b/configure.ac > > index 019842db..3aaa935c 100644 > > --- a/configure.ac > > +++ b/configure.ac > > @@ -56,8 +56,7 @@ case $host_platform:$host_cpu in > > default:i?86) > > host_platform=at;; > > default:x86_64)] > > - AC_MSG_WARN([Platform set to Xen by default, this can not boot on > > non-Xen systems, you currently need a 32bit build for that.]) > > - [host_platform=xen;; > > + [host_platform=at;; > > at:i?86 | xen:i?86 | at:x86_64 | xen:x86_64) > > :;; > > *)] > > diff --git a/i386/configfrag.ac b/i386/configfrag.ac > > index f697e277..f07a98ca 100644 > > --- a/i386/configfrag.ac > > +++ b/i386/configfrag.ac > > @@ -106,6 +106,8 @@ AC_ARG_ENABLE([apic], > > enable_pae=${enable_pae-yes};; > > *:i?86) > > :;; > > + *:x86_64) > > + enable_pae=${enable_pae-yes};; > > *) > > if [ x"$enable_pae" = xyes ]; then] > > AC_MSG_ERROR([can only enable the `PAE' feature on ix86.]) > > diff --git a/i386/i386/i386asm.sym b/i386/i386/i386asm.sym > > index 0662aea0..9e1d13d7 100644 > > --- a/i386/i386/i386asm.sym > > +++ b/i386/i386/i386asm.sym > > @@ -122,6 +122,7 @@ expr sizeof(pt_entry_t) > > PTE_SIZE > > expr INTEL_PTE_PFN PTE_PFN > > expr INTEL_PTE_VALID PTE_V > > expr INTEL_PTE_WRITE PTE_W > > +expr INTEL_PTE_PS PTE_S > > expr ~INTEL_PTE_VALID PTE_INVALID > > expr NPTES PTES_PER_PAGE > > expr INTEL_PTE_VALID|INTEL_PTE_WRITE INTEL_PTE_KERNEL > > diff --git a/i386/i386/vm_param.h b/i386/i386/vm_param.h > > index edd9522c..314fdb35 100644 > > --- a/i386/i386/vm_param.h > > +++ b/i386/i386/vm_param.h > > @@ -36,7 +36,7 @@ > > * for better trace support in kdb; the _START symbol has to be offset by > > the > > * same amount. */ > > #ifdef __x86_64__ > > -#define VM_MIN_KERNEL_ADDRESS 0x40000000UL > > +#define VM_MIN_KERNEL_ADDRESS KERNEL_MAP_BASE > > #else > > #define VM_MIN_KERNEL_ADDRESS 0xC0000000UL > > #endif > > diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c > > index 3bf00659..d0bd3b5d 100644 > > --- a/i386/intel/pmap.c > > +++ b/i386/intel/pmap.c > > @@ -655,7 +655,7 @@ void pmap_bootstrap(void) > > pa_to_pte(_kvtophys((void *) kernel_page_dir > > + i * INTEL_PGBYTES)) > > | INTEL_PTE_VALID > > -#ifdef MACH_PV_PAGETABLES > > +#if !defined(MACH_HYP) || defined(MACH_PV_PAGETABLES) > > | INTEL_PTE_WRITE > > #endif > > ); > > @@ -1297,7 +1297,7 @@ pmap_t pmap_create(vm_size_t size) > > WRITE_PTE(&p->pdpbase[i], > > pa_to_pte(kvtophys((vm_offset_t) page_dir[i])) > > | INTEL_PTE_VALID > > -#ifdef MACH_PV_PAGETABLES > > +#if !defined(MACH_HYP) || defined(MACH_PV_PAGETABLES) > > | INTEL_PTE_WRITE > > #endif > > ); > > diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h > > index f24b3a71..b93c4ad4 100644 > > --- a/i386/intel/pmap.h > > +++ b/i386/intel/pmap.h > > @@ -148,6 +148,7 @@ typedef phys_addr_t pt_entry_t; > > #define INTEL_PTE_NCACHE 0x00000010 > > #define INTEL_PTE_REF 0x00000020 > > #define INTEL_PTE_MOD 0x00000040 > > +#define INTEL_PTE_PS 0x00000080 > > #ifdef MACH_PV_PAGETABLES > > /* Not supported */ > > #define INTEL_PTE_GLOBAL 0x00000000 > > diff --git a/x86_64/Makefrag.am b/x86_64/Makefrag.am > > index 40b50bc9..5da734de 100644 > > --- a/x86_64/Makefrag.am > > +++ b/x86_64/Makefrag.am > > @@ -207,11 +207,27 @@ nodist_libkernel_a_SOURCES += \ > > > > EXTRA_DIST += \ > > x86_64/ldscript > > + > > if PLATFORM_at > > +# This should probably be 0xffffffff80000000 for mcmodel=kernel, but let's > > try > > +# to stay in the first 8G first, otherwise we have to fix the pmap module > > to > > +# actually use the l4 page level > > +#KERNEL_MAP_BASE=0x100000000 > > +# but for nor try with < 4G, otherwise we have linker errors > > +KERNEL_MAP_BASE=0x40000000 > > gnumach_LINKFLAGS += \ > > --defsym _START_MAP=$(_START_MAP) \ > > - --defsym _START=_START_MAP+0x40000000 \ > > + --defsym _START=_START_MAP \ > > + --defsym KERNEL_MAP_BASE=$(KERNEL_MAP_BASE) \ > > -T '$(srcdir)'/x86_64/ldscript > > + > > +AM_CFLAGS += -D_START_MAP=$(_START_MAP) \ > > + -DKERNEL_MAP_BASE=$(KERNEL_MAP_BASE) > > +AM_CCASFLAGS += -D_START_MAP=$(_START_MAP) \ > > + -DKERNEL_MAP_BASE=$(KERNEL_MAP_BASE) > > + > > +AM_CCASFLAGS += \ > > + -Ii386 > > endif > > > > AM_CPPFLAGS += \ > > diff --git a/x86_64/boothdr.S b/x86_64/boothdr.S > > new file mode 100644 > > index 00000000..12fc7ca2 > > --- /dev/null > > +++ b/x86_64/boothdr.S > > @@ -0,0 +1,238 @@ > > +/* > > + * Copyright (C) 2022 Free Software Foundation > > + * > > + * This program is free software ; you can redistribute it and/or modify > > + * it under the terms of the GNU General Public License as published by > > + * the Free Software Foundation ; either version 2 of the License, or > > + * (at your option) any later version. > > + * > > + * This program is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY ; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > + * GNU General Public License for more details. > > + * > > + * You should have received a copy of the GNU General Public License > > + * along with the program ; if not, write to the Free Software > > + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. > > + */ > > + > > +#include <mach/machine/asm.h> > > + > > +#include <i386/i386asm.h> > > +#include <i386/i386/proc_reg.h> > > +#include <i386/i386/seg.h> > > + /* > > + * This section will be put first into .boot. See also x86_64/ldscript. > > + */ > > + .section .boot.text,"ax" > > + .globl boot_start > > + > > + /* We should never be entered this way. */ > > + .code32 > > +boot_start: > > + jmp boot_entry > > + > > + /* MultiBoot header - see multiboot.h. */ > > +#define MULTIBOOT_MAGIC 0x1BADB002 > > +#ifdef __ELF__ > > +#define MULTIBOOT_FLAGS 0x00000003 > > +#else /* __ELF__ */ > > +#define MULTIBOOT_FLAGS 0x00010003 > > +#endif /* __ELF__ */ > > + P2ALIGN(2) > > +boot_hdr: > > + .long MULTIBOOT_MAGIC > > + .long MULTIBOOT_FLAGS > > + /* > > + * The next item here is the checksum. > > + * XX this works OK until we need at least the 30th bit. > > + */ > > + .long - (MULTIBOOT_MAGIC+MULTIBOOT_FLAGS) > > +#ifndef __ELF__ /* a.out kludge */ > > + .long boot_hdr /* header_addr */ > > + .long _start /* load_addr */ > > + .long _edata /* load_end_addr */ > > + .long _end /* bss_end_addr */ > > + .long boot_entry /* entry */ > > +#endif /* __ELF__ */ > > + > > +boot_entry: > > + /* > > + * Prepare minimal page mapping to jump to 64 bit and to C code. > > + * The first 4GB is identity mapped, and the first 2GB are re-mapped > > + * to high addresses at KERNEL_MAP_BASE > > + */ > > + > > + movl $p3table,%eax > > + or $(PTE_V|PTE_W),%eax > > + movl %eax,(p4table) > > + /* > > + * Fill 4 entries in L3 table to cover the whole 32-bit 4GB address > > + * space. Part of it might be remapped later if the kernel is mapped > > + * below 4G. > > + */ > > + movl $p2table,%eax > > + or $(PTE_V|PTE_W),%eax > > + movl %eax,(p3table) > > + movl $p2table1,%eax > > + or $(PTE_V|PTE_W),%eax > > + movl %eax,(p3table + 8) > > + movl $p2table2,%eax > > + or $(PTE_V|PTE_W),%eax > > + movl %eax,(p3table + 16) > > + movl $p2table3,%eax > > + or $(PTE_V|PTE_W),%eax > > + movl %eax,(p3table + 24) > > + /* point each page table level two entry to a page */ > > + mov $0,%ecx > > +.map_p2_table: > > + mov $0x200000,%eax // 2MiB page, should be always available > > + mul %ecx > > + or $(PTE_V|PTE_W|PTE_S),%eax // enable 2MiB page instead of 4k > > + mov %eax,p2table(,%ecx,8) > > + inc %ecx > > + cmp $2048,%ecx // 512 entries per table, map 4 L2 tables > > + jne .map_p2_table > > + > > + /* > > + * KERNEL_MAP_BASE must me aligned to 2GB. > > + * Depending on kernel starting address, we might need to add another > > + * entry in the L4 table (controlling 512 GB chunks). In any case, we > > + * add two entries in L3 table to make sure we map 2GB for the kernel. > > + * Note that this may override part of the mapping create above. > > + */ > > +.kernel_map: > > +#if KERNEL_MAP_BASE >= (1U << 39) > > + movl $p3ktable,%eax > > + or $(PTE_V|PTE_W),%eax > > + movl %eax,(p4table + (8 * ((KERNEL_MAP_BASE >> 39) & 0x1FF))) // > > select 512G block > > + movl $p2ktable1,%eax > > + or $(PTE_V|PTE_W),%eax > > + movl %eax,(p3ktable + (8 * ((KERNEL_MAP_BASE >> 30) & 0x1FF) )) // > > select first 1G block > > + movl $p2ktable2,%eax > > + or $(PTE_V|PTE_W),%eax > > + movl %eax,(p3ktable + (8 * (((KERNEL_MAP_BASE >> 30) & 0x1FF) + 1) > > )) // select second 1G block > > +#else > > + movl $p2ktable1,%eax > > + or $(PTE_V|PTE_W),%eax > > + movl %eax,(p3table + (8 * ((KERNEL_MAP_BASE >> 30) & 0x1FF) )) // > > select first 1G block > > + movl $p2ktable2,%eax > > + or $(PTE_V|PTE_W),%eax > > + movl %eax,(p3table + (8 * (((KERNEL_MAP_BASE >> 30) & 0x1FF) + 1) )) > > // select second 1G block > > +#endif > > + > > + mov $0,%ecx > > +.map_p2k_table: > > + mov $0x200000,%eax // 2MiB page, should be always available > > + mul %ecx > > + or $(PTE_V|PTE_W|PTE_S),%eax // enable 2MiB page instead of 4K > > + mov %eax,p2ktable1(,%ecx,8) > > + inc %ecx > > + cmp $1024,%ecx // 512 entries per table, map 2 L2 tables > > + jne .map_p2k_table > > + > > +switch64: > > + /* > > + * Jump to 64 bit mode, we have to > > + * - enable PAE > > + * - enable long mode > > + * - enable paging and load the tables filled above in CR3 > > + * - jump to a 64-bit code segment > > + */ > > + mov %cr4,%eax > > + or $CR4_PAE,%eax > > + mov %eax,%cr4 > > + mov $0xC0000080,%ecx // select EFER register > > + rdmsr > > + or $(1 << 8),%eax // long mode enable bit > > + wrmsr > > + mov $p4table,%eax > > + mov %eax,%cr3 > > + mov %cr0,%eax > > + or $CR0_PG,%eax > > + or $CR0_WP,%eax > > + mov %eax,%cr0 > > + > > + lgdt gdt64pointer > > + movw $0,%ax > > + movw %ax,%fs > > + movw %ax,%gs > > + movw $16,%ax > > + movw %ax,%ds > > + movw %ax,%es > > + movw %ax,%ss > > + ljmp $8,$boot_entry64 > > + > > + .code64 > > + > > + /* why do we need this? it seems overwritten by linker */ > > + .globl _start > > +_start: > > + > > +boot_entry64: > > + /* Switch to our own interrupt stack. */ > > + movq $(_intstack+INTSTACK_SIZE),%rax > > + andq $(~15),%rax > > + movq %rax,%rsp > > + > > + /* Reset EFLAGS to a known state. */ > > + pushq $0 > > + popf > > + /* save multiboot info for later */ > > + movq %rbx,%r8 > > + > > + /* Fix ifunc entries */ > > + movq $__rela_iplt_start,%rsi > > + movq $__rela_iplt_end,%rdi > > +iplt_cont: > > + cmpq %rdi,%rsi > > + jae iplt_done > > + movq (%rsi),%rbx /* r_offset */ > > + movb 4(%rsi),%al /* info */ > > + cmpb $42,%al /* IRELATIVE */ > > + jnz iplt_next > > + call *(%ebx) /* call ifunc */ > > + movq %rax,(%rbx) /* fixed address */ > > +iplt_next: > > + addq $8,%rsi > > + jmp iplt_cont > > +iplt_done: > > + > > + /* restore multiboot info */ > > + movq %r8,%rdi > > + /* Jump into C code. */ > > + call EXT(c_boot_entry) > > + /* not reached */ > > + nop > > + > > + .section .boot.data > > + .comm _intstack,INTSTACK_SIZE > > + > > + .code32 > > + .section .boot.data > > + .align 4096 > > +#define SEG_ACCESS_OFS 40 > > +#define SEG_GRANULARITY_OFS 52 > > +gdt64: > > + .quad 0 > > +gdt64code: > > + .quad (ACC_P << SEG_ACCESS_OFS) | (ACC_CODE_R << > > SEG_ACCESS_OFS) | (SZ_64 << SEG_GRANULARITY_OFS) > > +gdt64data: > > + .quad (ACC_P << SEG_ACCESS_OFS) | (ACC_DATA_W << > > SEG_ACCESS_OFS) > > +gdt64end: > > + .skip (4096 - (gdt64end - gdt64)) > > +gdt64pointer: > > + .word gdt64end - gdt64 - 1 > > + .quad gdt64 > > + > > + .section .boot.data > > + .align 4096 > > +p4table: .space 4096 > > +p3table: .space 4096 > > +p2table: .space 4096 > > +p2table1: .space 4096 > > +p2table2: .space 4096 > > +p2table3: .space 4096 > > +p3ktable: .space 4096 > > +p2ktable1: .space 4096 > > +p2ktable2: .space 4096 > > diff --git a/x86_64/ldscript b/x86_64/ldscript > > index 375e8104..de99795e 100644 > > --- a/x86_64/ldscript > > +++ b/x86_64/ldscript > > @@ -2,7 +2,7 @@ > > OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", > > "elf64-x86-64") > > OUTPUT_ARCH(i386:x86-64) > > -ENTRY(_start) > > +ENTRY(boot_start) > > SECTIONS > > { > > /* > > @@ -11,22 +11,30 @@ SECTIONS > > * be first in there. See also `i386/i386at/boothdr.S' and > > * `gnumach_LINKFLAGS' in `i386/Makefrag.am'. > > */ > > - . = _START; > > - .text : > > - AT (_START_MAP) > > + > > + . = _START_MAP; > > + .boot : > > + { > > + *(.boot.text) > > + *(.boot.data) > > + } =0x90909090 > > + > > + . += KERNEL_MAP_BASE; > > + _start = .; > > + .text : AT(((ADDR(.text)) - KERNEL_MAP_BASE)) > > { > > - *(.text.start) > > + *(.text*) > > *(.text .stub .text.* .gnu.linkonce.t.*) > > *(.text.unlikely .text.*_unlikely) > > KEEP (*(.text.*personality*)) > > /* .gnu.warning sections are handled specially by elf32.em. */ > > *(.gnu.warning) > > } =0x90909090 > > - .init : > > + .init : AT(((ADDR(.init)) - KERNEL_MAP_BASE)) > > { > > KEEP (*(.init)) > > } =0x90909090 > > - .fini : > > + .fini : AT(((ADDR(.fini)) - KERNEL_MAP_BASE)) > > { > > KEEP (*(.fini)) > > } =0x90909090 > > @@ -69,7 +77,7 @@ SECTIONS > > PROVIDE_HIDDEN (__rela_iplt_end = .); > > } > > .plt : { *(.plt) *(.iplt) } > > - .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } > > + .rodata : AT(((ADDR(.rodata)) - KERNEL_MAP_BASE)) { *(.rodata > > .rodata.* .gnu.linkonce.r.*) } > > .rodata1 : { *(.rodata1) } > > .eh_frame_hdr : { *(.eh_frame_hdr) } > > .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) } > > @@ -139,7 +147,7 @@ SECTIONS > > .got : { *(.got) *(.igot) } > > . = DATA_SEGMENT_RELRO_END (24, .); > > .got.plt : { *(.got.plt) *(.igot.plt) } > > - .data : > > + .data : AT(((ADDR(.data)) - KERNEL_MAP_BASE)) > > { > > *(.data .data.* .gnu.linkonce.d.*) > > SORT(CONSTRUCTORS) > > @@ -147,7 +155,7 @@ SECTIONS > > .data1 : { *(.data1) } > > _edata = .; PROVIDE (edata = .); > > __bss_start = .; > > - .bss : > > + .bss : AT(((ADDR(.bss)) - KERNEL_MAP_BASE)) > > { > > *(.dynbss) > > *(.bss .bss.* .gnu.linkonce.b.*) > > -- > > 2.30.2 > > > > > > -- > Samuel > --- > Pour une évaluation indépendante, transparente et rigoureuse ! > Je soutiens la Commission d'Évaluation de l'Inria. -- Samuel --- Pour une évaluation indépendante, transparente et rigoureuse ! Je soutiens la Commission d'Évaluation de l'Inria.