Hi Palmer, On 10/26/21 11:29 PM, Palmer Dabbelt wrote: > On Sat, 09 Oct 2021 10:20:20 PDT (-0700), a...@ghiti.fr wrote: >> Arf, I have sent this patchset with the wrong email address. @Palmer >> tell me if you want me to resend it correctly. > > Sorry for being kind of slow here. It's fine: there's a "From:" in > the patch, and git picks those up so it'll match the signed-off-by > line. I send pretty much all my patches that way, as I never managed > to get my Google address working correctly. > >> >> Thanks, >> >> Alex >> >> On 10/9/21 7:12 PM, Alexandre Ghiti wrote: >>> From: Alexandre Ghiti <a...@ghiti.fr> >>> >>> This config allows to compile 64b kernel as PIE and to relocate it at >>> any virtual address at runtime: this paves the way to KASLR. >>> Runtime relocation is possible since relocation metadata are >>> embedded into >>> the kernel. > > IMO this should really be user selectable, at a bare minimum so it's > testable. > I just sent along a patch to do that (my power's off at home, so email > is a bit > wacky right now). > > I haven't put this on for-next yet as I'm not sure if you had a fix > for the > kasan issue (which IIUC would conflict with this).
The kasan issue only revealed that I need to move the kasan shadow memory around with sv48 support, that's not related to the relocatable kernel. Thanks, Alex > >>> Note that relocating at runtime introduces an overhead even if the >>> kernel is loaded at the same address it was linked at and that the >>> compiler >>> options are those used in arm64 which uses the same RELA relocation >>> format. >>> >>> Signed-off-by: Alexandre Ghiti <a...@ghiti.fr> >>> --- >>> arch/riscv/Kconfig | 12 ++++++++ >>> arch/riscv/Makefile | 7 +++-- >>> arch/riscv/kernel/vmlinux.lds.S | 6 ++++ >>> arch/riscv/mm/Makefile | 4 +++ >>> arch/riscv/mm/init.c | 54 ++++++++++++++++++++++++++++++++- >>> 5 files changed, 80 insertions(+), 3 deletions(-) >>> >>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig >>> index ea16fa2dd768..043ba92559fa 100644 >>> --- a/arch/riscv/Kconfig >>> +++ b/arch/riscv/Kconfig >>> @@ -213,6 +213,18 @@ config PGTABLE_LEVELS >>> config LOCKDEP_SUPPORT >>> def_bool y >>> >>> +config RELOCATABLE >>> + bool >>> + depends on MMU && 64BIT && !XIP_KERNEL >>> + help >>> + This builds a kernel as a Position Independent Executable >>> (PIE), >>> + which retains all relocation metadata required to >>> relocate the >>> + kernel binary at runtime to a different virtual address >>> than the >>> + address it was linked at. >>> + Since RISCV uses the RELA relocation format, this requires a >>> + relocation pass at runtime even if the kernel is loaded >>> at the >>> + same address it was linked at. >>> + >>> source "arch/riscv/Kconfig.socs" >>> source "arch/riscv/Kconfig.erratas" >>> >>> diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile >>> index 0eb4568fbd29..2f509915f246 100644 >>> --- a/arch/riscv/Makefile >>> +++ b/arch/riscv/Makefile >>> @@ -9,9 +9,12 @@ >>> # >>> >>> OBJCOPYFLAGS := -O binary >>> -LDFLAGS_vmlinux := >>> +ifeq ($(CONFIG_RELOCATABLE),y) >>> + LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro >>> + KBUILD_CFLAGS += -fPIE >>> +endif >>> ifeq ($(CONFIG_DYNAMIC_FTRACE),y) >>> - LDFLAGS_vmlinux := --no-relax >>> + LDFLAGS_vmlinux += --no-relax >>> KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY >>> CC_FLAGS_FTRACE := -fpatchable-function-entry=8 >>> endif >>> diff --git a/arch/riscv/kernel/vmlinux.lds.S >>> b/arch/riscv/kernel/vmlinux.lds.S >>> index 5104f3a871e3..862a8c09723c 100644 >>> --- a/arch/riscv/kernel/vmlinux.lds.S >>> +++ b/arch/riscv/kernel/vmlinux.lds.S >>> @@ -133,6 +133,12 @@ SECTIONS >>> >>> BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0) >>> >>> + .rela.dyn : ALIGN(8) { >>> + __rela_dyn_start = .; >>> + *(.rela .rela*) >>> + __rela_dyn_end = .; >>> + } >>> + >>> #ifdef CONFIG_EFI >>> . = ALIGN(PECOFF_SECTION_ALIGNMENT); >>> __pecoff_data_virt_size = ABSOLUTE(. - __pecoff_text_end); >>> diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile >>> index 7ebaef10ea1b..2d33ec574bbb 100644 >>> --- a/arch/riscv/mm/Makefile >>> +++ b/arch/riscv/mm/Makefile >>> @@ -1,6 +1,10 @@ >>> # SPDX-License-Identifier: GPL-2.0-only >>> >>> CFLAGS_init.o := -mcmodel=medany >>> +ifdef CONFIG_RELOCATABLE >>> +CFLAGS_init.o += -fno-pie >>> +endif >>> + >>> ifdef CONFIG_FTRACE >>> CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE) >>> CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE) >>> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c >>> index c0cddf0fc22d..42041c12d496 100644 >>> --- a/arch/riscv/mm/init.c >>> +++ b/arch/riscv/mm/init.c >>> @@ -20,6 +20,9 @@ >>> #include <linux/dma-map-ops.h> >>> #include <linux/crash_dump.h> >>> #include <linux/hugetlb.h> >>> +#ifdef CONFIG_RELOCATABLE >>> +#include <linux/elf.h> >>> +#endif >>> >>> #include <asm/fixmap.h> >>> #include <asm/tlbflush.h> >>> @@ -103,7 +106,7 @@ static void __init print_vm_layout(void) >>> print_mlm("lowmem", (unsigned long)PAGE_OFFSET, >>> (unsigned long)high_memory); >>> #ifdef CONFIG_64BIT >>> - print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR, >>> + print_mlm("kernel", (unsigned long)kernel_map.virt_addr, >>> (unsigned long)ADDRESS_SPACE_END); >>> #endif >>> } >>> @@ -518,6 +521,44 @@ static __init pgprot_t pgprot_from_va(uintptr_t >>> va) >>> #error "setup_vm() is called from head.S before relocate so it >>> should not use absolute addressing." >>> #endif >>> >>> +#ifdef CONFIG_RELOCATABLE >>> +extern unsigned long __rela_dyn_start, __rela_dyn_end; >>> + >>> +static void __init relocate_kernel(void) >>> +{ >>> + Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start; >>> + /* >>> + * This holds the offset between the linked virtual address and >>> the >>> + * relocated virtual address. >>> + */ >>> + uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR; >>> + /* >>> + * This holds the offset between kernel linked virtual address and >>> + * physical address. >>> + */ >>> + uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - >>> kernel_map.phys_addr; >>> + >>> + for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) { >>> + Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset); >>> + Elf64_Addr relocated_addr = rela->r_addend; >>> + >>> + if (rela->r_info != R_RISCV_RELATIVE) >>> + continue; >>> + >>> + /* >>> + * Make sure to not relocate vdso symbols like rt_sigreturn >>> + * which are linked from the address 0 in vmlinux since >>> + * vdso symbol addresses are actually used as an offset from >>> + * mm->context.vdso in VDSO_OFFSET macro. >>> + */ >>> + if (relocated_addr >= KERNEL_LINK_ADDR) >>> + relocated_addr += reloc_offset; >>> + >>> + *(Elf64_Addr *)addr = relocated_addr; >>> + } >>> +} >>> +#endif /* CONFIG_RELOCATABLE */ >>> + >>> #ifdef CONFIG_XIP_KERNEL >>> static void __init create_kernel_page_table(pgd_t *pgdir, >>> __always_unused bool early) >>> @@ -625,6 +666,17 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) >>> BUG_ON((kernel_map.virt_addr + kernel_map.size) > >>> ADDRESS_SPACE_END - SZ_4K); >>> #endif >>> >>> +#ifdef CONFIG_RELOCATABLE >>> + /* >>> + * Early page table uses only one PGDIR, which makes it possible >>> + * to map PGDIR_SIZE aligned on PGDIR_SIZE: if the relocation >>> offset >>> + * makes the kernel cross over a PGDIR_SIZE boundary, raise a bug >>> + * since a part of the kernel would not get mapped. >>> + */ >>> + BUG_ON(PGDIR_SIZE - (kernel_map.virt_addr & (PGDIR_SIZE - 1)) < >>> kernel_map.size); >>> + relocate_kernel(); >>> +#endif >>> + >>> pt_ops.alloc_pte = alloc_pte_early; >>> pt_ops.get_pte_virt = get_pte_virt_early; >>> #ifndef __PAGETABLE_PMD_FOLDED