Refactor the existing assembly bits into smaller helper functions and also abstract L1D_FLUSH into a helper function. Use these functions in kvm for L1D flushing.
Reviewed-by: Kees Cook <keesc...@chromium.org> Signed-off-by: Balbir Singh <sbl...@amazon.com> --- arch/x86/include/asm/cacheflush.h | 3 ++ arch/x86/kernel/l1d_flush.c | 54 +++++++++++++++++++++++++++++++ arch/x86/kvm/vmx/vmx.c | 29 ++--------------- 3 files changed, 60 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index bac56fcd9790..21cc3b28fa63 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -8,7 +8,10 @@ #define L1D_CACHE_ORDER 4 void clflush_cache_range(void *addr, unsigned int size); +void l1d_flush_populate_tlb(void *l1d_flush_pages); void *l1d_flush_alloc_pages(void); void l1d_flush_cleanup_pages(void *l1d_flush_pages); +void l1d_flush_sw(void *l1d_flush_pages); +int l1d_flush_hw(void); #endif /* _ASM_X86_CACHEFLUSH_H */ diff --git a/arch/x86/kernel/l1d_flush.c b/arch/x86/kernel/l1d_flush.c index d605878c8f28..5871794f890d 100644 --- a/arch/x86/kernel/l1d_flush.c +++ b/arch/x86/kernel/l1d_flush.c @@ -34,3 +34,57 @@ void l1d_flush_cleanup_pages(void *l1d_flush_pages) free_pages((unsigned long)l1d_flush_pages, L1D_CACHE_ORDER); } EXPORT_SYMBOL_GPL(l1d_flush_cleanup_pages); + +/* + * Not all users of l1d flush would want to populate the TLB first + * split out the function so that callers can optionally flush the L1D + * cache via sw without prefetching the TLB. + */ +void l1d_flush_populate_tlb(void *l1d_flush_pages) +{ + int size = PAGE_SIZE << L1D_CACHE_ORDER; + + asm volatile( + /* First ensure the pages are in the TLB */ + "xorl %%eax, %%eax\n" + ".Lpopulate_tlb:\n\t" + "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" + "addl $4096, %%eax\n\t" + "cmpl %%eax, %[size]\n\t" + "jne .Lpopulate_tlb\n\t" + "xorl %%eax, %%eax\n\t" + "cpuid\n\t" + :: [flush_pages] "r" (l1d_flush_pages), + [size] "r" (size) + : "eax", "ebx", "ecx", "edx"); +} +EXPORT_SYMBOL_GPL(l1d_flush_populate_tlb); + +int l1d_flush_hw(void) +{ + if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) { + wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH); + return 0; + } + return -ENOTSUPP; +} +EXPORT_SYMBOL_GPL(l1d_flush_hw); + +void l1d_flush_sw(void *l1d_flush_pages) +{ + int size = PAGE_SIZE << L1D_CACHE_ORDER; + + asm volatile( + /* Fill the cache */ + "xorl %%eax, %%eax\n" + ".Lfill_cache:\n" + "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" + "addl $64, %%eax\n\t" + "cmpl %%eax, %[size]\n\t" + "jne .Lfill_cache\n\t" + "lfence\n" + :: [flush_pages] "r" (l1d_flush_pages), + [size] "r" (size) + : "eax", "ecx"); +} +EXPORT_SYMBOL_GPL(l1d_flush_sw); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f35654db904a..4f95927aad4c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6031,8 +6031,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, */ static void vmx_l1d_flush(struct kvm_vcpu *vcpu) { - int size = PAGE_SIZE << L1D_CACHE_ORDER; - /* * This code is only executed when the the flush mode is 'cond' or * 'always' @@ -6061,32 +6059,11 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu) vcpu->stat.l1d_flush++; - if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) { - wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH); + if (!l1d_flush_hw()) return; - } - asm volatile( - /* First ensure the pages are in the TLB */ - "xorl %%eax, %%eax\n" - ".Lpopulate_tlb:\n\t" - "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" - "addl $4096, %%eax\n\t" - "cmpl %%eax, %[size]\n\t" - "jne .Lpopulate_tlb\n\t" - "xorl %%eax, %%eax\n\t" - "cpuid\n\t" - /* Now fill the cache */ - "xorl %%eax, %%eax\n" - ".Lfill_cache:\n" - "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" - "addl $64, %%eax\n\t" - "cmpl %%eax, %[size]\n\t" - "jne .Lfill_cache\n\t" - "lfence\n" - :: [flush_pages] "r" (vmx_l1d_flush_pages), - [size] "r" (size) - : "eax", "ebx", "ecx", "edx"); + l1d_flush_populate_tlb(vmx_l1d_flush_pages); + l1d_flush_sw(vmx_l1d_flush_pages); } static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) -- 2.17.1