Under FRED, there's one entrypoint from Ring 3, and one from Ring 0. FRED gives us a good stack (even for SYSCALL/SYSENTER), and a unified event frame on the stack, meaing that all software needs to do is spill the GPRs with a line of PUSHes. Introduce PUSH_AND_CLEAR_GPRS and POP_GPRS for this purpose.
Introduce entry_FRED_R0() which to a first appoximation is complete for all event handling within Xen. entry_FRED_R0() needs deriving from entry_FRED_R3(), so introduce a basic handler. There is more work required to make the return-to-guest path work under FRED, so leave a BUG clearly in place. Also introduce entry_from_{xen,pv}() to be the C level handlers. By simply copying regs->fred_ss.vector into regs->entry_vector, we can reuse all the existing fault handlers. Extend fatal_trap() to render the event type, including by name, when FRED is active. This is slightly complicated, because X86_ET_OTHER must not use vector_name() or SYSCALL and SYSENTER get rendered as #BP and #DB. Also, {read,write}_gs_shadow() needs modifying to avoid the SWAPGS instruction, which is disallowed in FRED mode. This is sufficient to handle all interrupts and exceptions encountered during development, including plenty of Double Faults. Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com> --- CC: Jan Beulich <jbeul...@suse.com> CC: Roger Pau Monné <roger....@citrix.com> v2: * Don't render a vector name for X86_ET_SW_INT * Fix typos in names[] * Link entry-fred.o first SIMICS hasn't been updated to the FRED v9, and still wants ENDBR instructions at the entrypoints. --- xen/arch/x86/include/asm/asm_defns.h | 65 +++++++++++ xen/arch/x86/traps.c | 159 ++++++++++++++++++++++++++- xen/arch/x86/x86_64/Makefile | 1 + xen/arch/x86/x86_64/entry-fred.S | 33 ++++++ 4 files changed, 255 insertions(+), 3 deletions(-) create mode 100644 xen/arch/x86/x86_64/entry-fred.S diff --git a/xen/arch/x86/include/asm/asm_defns.h b/xen/arch/x86/include/asm/asm_defns.h index 72a0082d319d..a81a4043d0f1 100644 --- a/xen/arch/x86/include/asm/asm_defns.h +++ b/xen/arch/x86/include/asm/asm_defns.h @@ -315,6 +315,71 @@ static always_inline void stac(void) subq $-(UREGS_error_code-UREGS_r15+\adj), %rsp .endm +/* + * Push and clear GPRs + */ +.macro PUSH_AND_CLEAR_GPRS + push %rdi + xor %edi, %edi + push %rsi + xor %esi, %esi + push %rdx + xor %edx, %edx + push %rcx + xor %ecx, %ecx + push %rax + xor %eax, %eax + push %r8 + xor %r8d, %r8d + push %r9 + xor %r9d, %r9d + push %r10 + xor %r10d, %r10d + push %r11 + xor %r11d, %r11d + push %rbx + xor %ebx, %ebx + push %rbp +#ifdef CONFIG_FRAME_POINTER +/* Indicate special exception stack frame by inverting the frame pointer. */ + mov %rsp, %rbp + notq %rbp +#else + xor %ebp, %ebp +#endif + push %r12 + xor %r12d, %r12d + push %r13 + xor %r13d, %r13d + push %r14 + xor %r14d, %r14d + push %r15 + xor %r15d, %r15d +.endm + +/* + * POP GPRs from a UREGS_* frame on the stack. Does not modify flags. + * + * @rax: Alternative destination for the %rax value on the stack. + */ +.macro POP_GPRS rax=%rax + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %r11 + pop %r10 + pop %r9 + pop %r8 + pop \rax + pop %rcx + pop %rdx + pop %rsi + pop %rdi +.endm + #ifdef CONFIG_PV32 #define CR4_PV32_RESTORE \ ALTERNATIVE_2 "", \ diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 66308e7c9edf..67763bec0dc5 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -89,6 +89,13 @@ const unsigned int nmi_cpu; #define stack_words_per_line 4 #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)(regs)->rsp) +/* Only valid to use when FRED is active. */ +static inline struct fred_info *cpu_regs_fred_info(struct cpu_user_regs *regs) +{ + ASSERT(read_cr4() & X86_CR4_FRED); + return (void *)(regs + 1); +} + struct extra_state { unsigned long cr0, cr2, cr3, cr4; @@ -1023,6 +1030,32 @@ void show_execution_state_nmi(const cpumask_t *mask, bool show_all) printk("Non-responding CPUs: {%*pbl}\n", CPUMASK_PR(&show_state_mask)); } +static const char *x86_et_name(unsigned int type) +{ + static const char *const names[] = { + [X86_ET_EXT_INTR] = "EXT_INTR", + [X86_ET_NMI] = "NMI", + [X86_ET_HW_EXC] = "HW_EXC", + [X86_ET_SW_INT] = "SW_INT", + [X86_ET_PRIV_SW_EXC] = "PRIV_SW_EXC", + [X86_ET_SW_EXC] = "SW_EXC", + [X86_ET_OTHER] = "OTHER", + }; + + return (type < ARRAY_SIZE(names) && names[type]) ? names[type] : "???"; +} + +static const char *x86_et_other_name(unsigned int what) +{ + static const char *const names[] = { + [0] = "MTF", + [1] = "SYSCALL", + [2] = "SYSENTER", + }; + + return (what < ARRAY_SIZE(names) && names[what]) ? names[what] : "???"; +} + const char *vector_name(unsigned int vec) { static const char names[][4] = { @@ -1101,9 +1134,41 @@ void fatal_trap(const struct cpu_user_regs *regs, bool show_remote) } } - panic("FATAL TRAP: vec %u, %s[%04x]%s\n", - trapnr, vector_name(trapnr), regs->error_code, - (regs->eflags & X86_EFLAGS_IF) ? "" : " IN INTERRUPT CONTEXT"); + if ( read_cr4() & X86_CR4_FRED ) + { + bool render_ec = false; + const char *vec_name = NULL; + + switch ( regs->fred_ss.type ) + { + case X86_ET_HW_EXC: + case X86_ET_PRIV_SW_EXC: + case X86_ET_SW_EXC: + render_ec = true; + vec_name = vector_name(regs->fred_ss.vector); + break; + + case X86_ET_OTHER: + vec_name = x86_et_other_name(regs->fred_ss.vector); + break; + } + + if ( render_ec ) + panic("Fatal TRAP: type %u, %s, vec %u, %s[%04x]%s\n", + regs->fred_ss.type, x86_et_name(regs->fred_ss.type), + regs->fred_ss.vector, vec_name ?: "", + regs->error_code, + (regs->eflags & X86_EFLAGS_IF) ? "" : " IN INTERRUPT CONTEXT"); + else + panic("Fatal TRAP: type %u, %s, vec %u, %s%s\n", + regs->fred_ss.type, x86_et_name(regs->fred_ss.type), + regs->fred_ss.vector, vec_name ?: "", + (regs->eflags & X86_EFLAGS_IF) ? "" : " IN INTERRUPT CONTEXT"); + } + else + panic("FATAL TRAP: vec %u, %s[%04x]%s\n", + trapnr, vector_name(trapnr), regs->error_code, + (regs->eflags & X86_EFLAGS_IF) ? "" : " IN INTERRUPT CONTEXT"); } void asmlinkage noreturn do_unhandled_trap(struct cpu_user_regs *regs) @@ -2198,6 +2263,94 @@ void asmlinkage check_ist_exit(const struct cpu_user_regs *regs, bool ist_exit) } #endif +void asmlinkage entry_from_pv(struct cpu_user_regs *regs) +{ + /* Copy fred_ss.vector into entry_vector as IDT delivery would have done. */ + regs->entry_vector = regs->fred_ss.vector; + + fatal_trap(regs, false); +} + +void asmlinkage entry_from_xen(struct cpu_user_regs *regs) +{ + struct fred_info *fi = cpu_regs_fred_info(regs); + uint8_t type = regs->fred_ss.type; + + /* Copy fred_ss.vector into entry_vector as IDT delivery would have done. */ + regs->entry_vector = regs->fred_ss.vector; + + /* + * First, handle the asynchronous or fatal events. These are either + * unrelated to the interrupted context, or may not have valid context + * recorded, and all have special rules on how/whether to re-enable IRQs. + */ + switch ( type ) + { + case X86_ET_EXT_INTR: + return do_IRQ(regs); + + case X86_ET_NMI: + return do_nmi(regs); + + case X86_ET_HW_EXC: + switch ( regs->fred_ss.vector ) + { + case X86_EXC_DF: return do_double_fault(regs); + case X86_EXC_MC: return do_machine_check(regs); + } + break; + } + + /* + * With the asynchronous events handled, what remains are the synchronous + * ones. If we interrupted an IRQs-on region, we should re-enable IRQs + * now; for #PF and #DB, %cr2 and %dr6 are on the stack in edata. + */ + if ( regs->eflags & X86_EFLAGS_IF ) + local_irq_enable(); + + switch ( type ) + { + case X86_ET_HW_EXC: + case X86_ET_PRIV_SW_EXC: + case X86_ET_SW_EXC: + switch ( regs->fred_ss.vector ) + { + case X86_EXC_PF: handle_PF(regs, fi->edata); break; + case X86_EXC_GP: do_general_protection(regs); break; + case X86_EXC_UD: do_invalid_op(regs); break; + case X86_EXC_NM: do_device_not_available(regs); break; + case X86_EXC_BP: do_int3(regs); break; + case X86_EXC_DB: handle_DB(regs, fi->edata); break; + + case X86_EXC_DE: + case X86_EXC_OF: + case X86_EXC_BR: + case X86_EXC_NP: + case X86_EXC_SS: + case X86_EXC_MF: + case X86_EXC_AC: + case X86_EXC_XM: + do_trap(regs); + break; + + case X86_EXC_CP: do_entry_CP(regs); break; + + default: + goto fatal; + } + break; + + default: + goto fatal; + } + + return; + + fatal: + fatal_trap(regs, false); +} + /* * Local variables: * mode: C diff --git a/xen/arch/x86/x86_64/Makefile b/xen/arch/x86/x86_64/Makefile index f20763088740..c0a0b6603221 100644 --- a/xen/arch/x86/x86_64/Makefile +++ b/xen/arch/x86/x86_64/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_PV32) += compat/ +obj-bin-y += entry-fred.o obj-bin-y += entry.o obj-$(CONFIG_KEXEC) += machine_kexec.o obj-y += pci.o diff --git a/xen/arch/x86/x86_64/entry-fred.S b/xen/arch/x86/x86_64/entry-fred.S new file mode 100644 index 000000000000..3c3320df22cb --- /dev/null +++ b/xen/arch/x86/x86_64/entry-fred.S @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + + .file "x86_64/entry-fred.S" + +#include <asm/asm_defns.h> +#include <asm/page.h> + + .section .text.entry, "ax", @progbits + + /* The Ring3 entry point is required to be 4k aligned. */ + +FUNC(entry_FRED_R3, 4096) + PUSH_AND_CLEAR_GPRS + + mov %rsp, %rdi + call entry_from_pv + + POP_GPRS + eretu +END(entry_FRED_R3) + + /* The Ring0 entrypoint is at Ring3 + 0x100. */ + .org entry_FRED_R3 + 0x100, 0xcc + +FUNC_LOCAL(entry_FRED_R0, 0) + PUSH_AND_CLEAR_GPRS + + mov %rsp, %rdi + call entry_from_xen + + POP_GPRS + erets +END(entry_FRED_R0) -- 2.39.5