On 16.11.20 14:04, Peter Zijlstra wrote:
On Mon, Nov 16, 2020 at 12:56:32PM +0100, Jürgen Groß wrote:static inline notrace unsigned long arch_local_save_flags(void) { PVOP_CALL_ARGS; PVOP_TEST_NULL(irq.save_fl); asm_inline volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL), "PUSHF; POP _ASM_AX", X86_FEATURE_NATIVE)I am wondering whether we really want a new feature (basically "not XENPV). We could use ~X86_FEATURE_XENPV and teach apply_alternatives() to understand negated features (yes, this limits the number of features to 32767, but I don't think this is a real problem for quite some time). Thoughts?I went with the simple thing for now... If we ever want/need another negative alternative I suppose we can do as you suggest... I was still poking at objtool to actually dtrt though..
I'd like to include this part in my series (with a different solution for the restore_fl part, as suggested by Andy before). Peter, are you fine with me taking your patch and adding your SoB? Juergen
--- diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index dad350d42ecf..cc88f358bac5 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -237,6 +237,7 @@ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ #define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_NOT_XENPV ( 8*32+21) /* "" inverse of X86_FEATURE_XENPV *//* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index d25cc6830e89..e2a9d3e6b7ad 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -645,22 +645,56 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); #ifdef CONFIG_PARAVIRT_XXL static inline notrace unsigned long arch_local_save_flags(void) { - return PVOP_CALLEE0(unsigned long, irq.save_fl); + PVOP_CALL_ARGS; + PVOP_TEST_NULL(irq.save_fl); + asm_inline volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL), + "pushf; pop %%" _ASM_AX ";", + X86_FEATURE_NOT_XENPV) + : PVOP_CALLEE_CLOBBERS, ASM_CALL_CONSTRAINT + : paravirt_type(irq.save_fl.func), + paravirt_clobber(CLBR_RET_REG) + : "memory", "cc"); + return __eax; }static inline notrace void arch_local_irq_restore(unsigned long f){ - PVOP_VCALLEE1(irq.restore_fl, f); + PVOP_CALL_ARGS; + PVOP_TEST_NULL(irq.restore_fl); + asm_inline volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL), + "push %%" _ASM_ARG1 "; popf;", + X86_FEATURE_NOT_XENPV) + : PVOP_VCALLEE_CLOBBERS, ASM_CALL_CONSTRAINT + : paravirt_type(irq.restore_fl.func), + paravirt_clobber(CLBR_RET_REG), + PVOP_CALL_ARG1(f) + : "memory", "cc"); }static inline notrace void arch_local_irq_disable(void){ - PVOP_VCALLEE0(irq.irq_disable); + PVOP_CALL_ARGS; + PVOP_TEST_NULL(irq.irq_disable); + asm_inline volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL), + "cli;", + X86_FEATURE_NOT_XENPV) + : PVOP_VCALLEE_CLOBBERS, ASM_CALL_CONSTRAINT + : paravirt_type(irq.irq_disable.func), + paravirt_clobber(CLBR_RET_REG) + : "memory", "cc"); }static inline notrace void arch_local_irq_enable(void){ - PVOP_VCALLEE0(irq.irq_enable); + PVOP_CALL_ARGS; + PVOP_TEST_NULL(irq.irq_enable); + asm_inline volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL), + "sti;", + X86_FEATURE_NOT_XENPV) + : PVOP_VCALLEE_CLOBBERS, ASM_CALL_CONSTRAINT + : paravirt_type(irq.irq_enable.func), + paravirt_clobber(CLBR_RET_REG) + : "memory", "cc"); }static inline notrace unsigned long arch_local_irq_save(void)diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 2400ad62f330..5bd8f5503652 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -723,6 +723,19 @@ void __init alternative_instructions(void) * patching. */+ if (!boot_cpu_has(X86_FEATURE_XENPV))+ setup_force_cpu_cap(X86_FEATURE_NOT_XENPV); + + /* + * First patch paravirt functions, such that we overwrite the indirect + * call with the direct call. + */ + apply_paravirt(__parainstructions, __parainstructions_end); + + /* + * Then patch alternatives, such that those paravirt calls that are in + * alternatives can be overwritten by their immediate fragments. + */ apply_alternatives(__alt_instructions, __alt_instructions_end);#ifdef CONFIG_SMP@@ -741,8 +754,6 @@ void __init alternative_instructions(void) } #endif- apply_paravirt(__parainstructions, __parainstructions_end);- restart_nmi(); alternatives_patched = 1; } diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c index ace6e334cb39..867498db53ad 100644 --- a/arch/x86/kernel/paravirt_patch.c +++ b/arch/x86/kernel/paravirt_patch.c @@ -33,13 +33,9 @@ struct patch_xxl { };static const struct patch_xxl patch_data_xxl = {- .irq_irq_disable = { 0xfa }, // cli - .irq_irq_enable = { 0xfb }, // sti - .irq_save_fl = { 0x9c, 0x58 }, // pushf; pop %[re]ax .mmu_read_cr2 = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax .mmu_read_cr3 = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax .mmu_write_cr3 = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3 - .irq_restore_fl = { 0x57, 0x9d }, // push %rdi; popfq .cpu_wbinvd = { 0x0f, 0x09 }, // wbinvd .cpu_usergs_sysret64 = { 0x0f, 0x01, 0xf8, 0x48, 0x0f, 0x07 }, // swapgs; sysretq @@ -76,11 +72,6 @@ unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr, switch (type) {#ifdef CONFIG_PARAVIRT_XXL- PATCH_CASE(irq, restore_fl, xxl, insn_buff, len); - PATCH_CASE(irq, save_fl, xxl, insn_buff, len); - PATCH_CASE(irq, irq_enable, xxl, insn_buff, len); - PATCH_CASE(irq, irq_disable, xxl, insn_buff, len); - PATCH_CASE(mmu, read_cr2, xxl, insn_buff, len); PATCH_CASE(mmu, read_cr3, xxl, insn_buff, len); PATCH_CASE(mmu, write_cr3, xxl, insn_buff, len);
OpenPGP_0xB0DE9DD628BF132F.asc
Description: application/pgp-keys
OpenPGP_signature
Description: OpenPGP digital signature