Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com> --- CC: Jan Beulich <jbeul...@suse.com> CC: Roger Pau Monné <roger....@citrix.com>
This is on top of the FRED series for the wrmsrns() cleanup, but otherwise unrelated. The code generation isn't entirely ideal Function old new delta init_fred 255 274 +19 vmx_set_reg 248 256 +8 enter_state_helper.cold 1014 1018 +4 __start_xen 8893 8897 +4 but made worse by the the prior codegen for wrmsrns(MSR_STAR, ...) being mad: mov $0xc0000081,%ecx mov $0xe023e008,%edx movabs $0xe023e00800000000,%rax cs wrmsr The two sources of code expansion come from the compiler not being able to construct %eax and %edx separately, and not being able propagate constants. Loading 0 is possibly common enough to warrant another specialisation where we can use "a" (0), "d" (0) and forgo the MOV+SHR. I'm probably overthinking things. The addition will be in the noise in practice, and Intel are sure the advantage of MSR_IMM will not be. --- xen/arch/x86/include/asm/alternative.h | 7 ++++ xen/arch/x86/include/asm/msr.h | 39 ++++++++++++++++++++- xen/include/public/arch-x86/cpufeatureset.h | 1 + 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/xen/arch/x86/include/asm/alternative.h b/xen/arch/x86/include/asm/alternative.h index 0482bbf7cbf1..fe87b15ec72c 100644 --- a/xen/arch/x86/include/asm/alternative.h +++ b/xen/arch/x86/include/asm/alternative.h @@ -151,6 +151,13 @@ extern void alternative_instructions(void); ALTERNATIVE(oldinstr, newinstr, feature) \ :: input ) +#define alternative_input_2(oldinstr, newinstr1, feature1, \ + newinstr2, feature2, input...) \ + asm_inline volatile ( \ + ALTERNATIVE_2(oldinstr, newinstr1, feature1, \ + newinstr2, feature2) \ + :: input ) + /* Like alternative_input, but with a single output argument */ #define alternative_io(oldinstr, newinstr, feature, output, input...) \ asm_inline volatile ( \ diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h index 01f510315ffe..434fcac854e1 100644 --- a/xen/arch/x86/include/asm/msr.h +++ b/xen/arch/x86/include/asm/msr.h @@ -38,9 +38,46 @@ static inline void wrmsrl(unsigned int msr, uint64_t val) wrmsr(msr, lo, hi); } +/* + * Non-serialising WRMSR with a compile-time constant index, when available. + * Falls back to plain WRMSRNS, or to a serialising WRMSR. + */ +static always_inline void __wrmsrns_imm(uint32_t msr, uint64_t val) +{ + /* + * For best performance, WRMSRNS %r64, $msr is recommended. For + * compatibility, we need to fall back to plain WRMSRNS, or to WRMSR. + * + * The combined ABI is awkward, because WRMSRNS $imm takes a single r64, + * whereas WRMSR{,NS} takes a split edx:eax pair. + * + * Always use WRMSRNS %rax, $imm, because it has the most in common with + * the legacy forms. When MSR_IMM isn't available, emit setup logic for + * %ecx and %edx too. + */ + alternative_input_2( + "mov $%c[msr], %%ecx\n\t" + "mov %%rax, %%rdx\n\t" + "shr $32, %%rdx\n\t" + ".byte 0x2e; wrmsr", + + /* WRMSRNS %rax, $msr */ + ".byte 0xc4,0xe7,0x7a,0xf6,0xc0; .long %c[msr]", X86_FEATURE_MSR_IMM, + + "mov $%c[msr], %%ecx\n\t" + "mov %%rax, %%rdx\n\t" + "shr $32, %%rdx\n\t" + ".byte 0x0f,0x01,0xc6", X86_FEATURE_WRMSRNS, + + [msr] "i" (msr), "a" (val) : "rcx", "rdx"); +} + /* Non-serialising WRMSR, when available. Falls back to a serialising WRMSR. */ -static inline void wrmsrns(uint32_t msr, uint64_t val) +static always_inline void wrmsrns(uint32_t msr, uint64_t val) { + if ( __builtin_constant_p(msr) ) + return __wrmsrns_imm(msr, val); + /* * WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant CS * prefix to avoid a trailing NOP. diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h index 9cd778586f10..af69cf3822eb 100644 --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -352,6 +352,7 @@ XEN_CPUFEATURE(MCDT_NO, 13*32+ 5) /*A MCDT_NO */ XEN_CPUFEATURE(UC_LOCK_DIS, 13*32+ 6) /* UC-lock disable */ /* Intel-defined CPU features, CPUID level 0x00000007:1.ecx, word 14 */ +XEN_CPUFEATURE(MSR_IMM, 14*32+ 5) /* {RD,WR}MSR $imm32 */ /* Intel-defined CPU features, CPUID level 0x00000007:1.edx, word 15 */ XEN_CPUFEATURE(AVX_VNNI_INT8, 15*32+ 4) /*A AVX-VNNI-INT8 Instructions */ -- 2.39.5