-------- Original Message --------
Subject:        [KVM TSC emulation 9/9] Add software TSC emulation
Date:   Mon, 20 Jun 2011 16:59:37 -0700
From:   Zachary Amsden <[email protected]>
To: Avi Kivity <[email protected]>, Marcelo Tosatti <[email protected]>, Glauber Costa <[email protected]>, Frank Arnold <[email protected]>, Joerg Roedel <[email protected]>, Jan Kiszka <[email protected]>, [email protected], [email protected], Zachary Amsden <[email protected]>, Avi Kivity <[email protected]>, Marcelo Tosatti <[email protected]>, Glauber Costa <[email protected]>, Frank Arnold <[email protected]>, Joerg Roedel <[email protected]>, Jan Kiszka <[email protected]>, [email protected] CC: Zachary Amsden <[email protected]>, Zachary Amsden <[email protected]>



When hardware assistance is unavailable to scale the TSC, or it is
not possible to keep in sync, add a software virtualization mode
where the TSC is trapped and thus guaranteed to always have perfect
synchronization.

Currently this behavior defaults to on; how and when the decision to
use trapping is made is likely to be a matter of debate.  For now,
just make it possible.

Signed-off-by: Zachary Amsden<[email protected]>
---
 arch/x86/kvm/svm.c |   26 +++++++++++++++++++++++++-
 arch/x86/kvm/vmx.c |   28 +++++++++++++++++++++++++++-
 arch/x86/kvm/x86.c |   34 +++++++++++++++++++++++-----------
 arch/x86/kvm/x86.h |    5 +++++
 4 files changed, 80 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index dcab00e..fc4583d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -185,6 +185,7 @@ module_param(nested, int, S_IRUGO);

 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
 static void svm_complete_interrupts(struct vcpu_svm *svm);
+static void svm_set_tsc_trapping(struct kvm_vcpu *vcpu, bool trap);

 static int nested_svm_exit_handled(struct vcpu_svm *svm);
 static int nested_svm_intercept(struct vcpu_svm *svm);
@@ -912,13 +913,18 @@ static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 
user_tsc_khz, bool scale)
        u64 khz;

        /* Guest TSC same frequency as host TSC? */
-       if (!scale) {
+       if (!scale&&  !check_tsc_unstable()) {
                svm->tsc_ratio = TSC_RATIO_DEFAULT;
                return;
        }

        /* TSC scaling supported? */
        if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+               if (kvm_software_tsc) {
+                       pr_debug("kvm: using TSC trapping\n");
+                       svm_set_tsc_trapping(vcpu, true);
+                       return;
+               }
                if (user_tsc_khz>  tsc_khz) {
                        vcpu->arch.tsc_catchup = 1;
                        vcpu->arch.tsc_always_catchup = 1;
@@ -1184,6 +1190,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, 
unsigned int id)
        svm->vmcb_pa = page_to_pfn(page)<<  PAGE_SHIFT;
        svm->asid_generation = 0;
        init_vmcb(svm);
+       kvm_set_tsc_khz(&svm->vcpu, kvm_max_tsc_khz);
        kvm_write_tsc(&svm->vcpu, 0);

        err = fx_init(&svm->vcpu);
@@ -1303,6 +1310,15 @@ static void svm_clear_vintr(struct vcpu_svm *svm)
        clr_intercept(svm, INTERCEPT_VINTR);
 }

+static void svm_set_tsc_trapping(struct kvm_vcpu *vcpu, bool trap)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+       if (trap)
+               set_intercept(svm, INTERCEPT_RDTSC);
+       else
+               clr_intercept(svm, INTERCEPT_RDTSC);
+}
+
 static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
 {
        struct vmcb_save_area *save =&to_svm(vcpu)->vmcb->save;
@@ -2732,6 +2748,13 @@ static int task_switch_interception(struct vcpu_svm *svm)
        return 1;
 }

+static int rdtsc_interception(struct vcpu_svm *svm)
+{
+       svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
+       kvm_read_tsc(&svm->vcpu);
+       return 1;
+}
+
 static int cpuid_interception(struct vcpu_svm *svm)
 {
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
@@ -3178,6 +3201,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = 
{
        [SVM_EXIT_SMI]                          = nop_on_interception,
        [SVM_EXIT_INIT]                         = nop_on_interception,
        [SVM_EXIT_VINTR]                        = interrupt_window_interception,
+       [SVM_EXIT_RDTSC]                        = rdtsc_interception,
        [SVM_EXIT_CPUID]                        = cpuid_interception,
        [SVM_EXIT_IRET]                         = iret_interception,
        [SVM_EXIT_INVD]                         = emulate_on_interception,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 780fe12..65066b4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -606,6 +606,7 @@ static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
 static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
+static void vmx_set_tsc_trapping(struct kvm_vcpu *vcpu, bool trap);

 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -1756,9 +1757,14 @@ static u64 guest_read_tsc(void)
  */
 static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool 
scale)
 {
-       if (!scale)
+       if (!scale&&  !check_tsc_unstable())
                return;

+       if (kvm_software_tsc) {
+               pr_debug("kvm: using TSC trapping\n");
+               vmx_set_tsc_trapping(vcpu, true);
+               return;
+       }
        if (user_tsc_khz>  tsc_khz) {
                vcpu->arch.tsc_catchup = 1;
                vcpu->arch.tsc_always_catchup = 1;
@@ -3695,6 +3701,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
        vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
        set_cr4_guest_host_mask(vmx);

+       kvm_set_tsc_khz(&vmx->vcpu, kvm_max_tsc_khz);
        kvm_write_tsc(&vmx->vcpu, 0);

        return 0;
@@ -3997,6 +4004,18 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned 
int addr)
        return 0;
 }

+static void vmx_set_tsc_trapping(struct kvm_vcpu *vcpu, bool trap)
+{
+       u32 cpu_based_vm_exec_control;
+
+       cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+       if (trap)
+               cpu_based_vm_exec_control |= CPU_BASED_RDTSC_EXITING;
+       else
+               cpu_based_vm_exec_control&= ~CPU_BASED_RDTSC_EXITING;
+       vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
 static int handle_rmode_exception(struct kvm_vcpu *vcpu,
                                  int vec, u32 err_code)
 {
@@ -4497,6 +4516,12 @@ static int handle_invlpg(struct kvm_vcpu *vcpu)
        return 1;
 }

+static int handle_rdtsc(struct kvm_vcpu *vcpu)
+{
+       kvm_read_tsc(vcpu);
+       return 1;
+}
+
 static int handle_wbinvd(struct kvm_vcpu *vcpu)
 {
        skip_emulated_instruction(vcpu);
@@ -5421,6 +5446,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu 
*vcpu) = {
        [EXIT_REASON_HLT]                     = handle_halt,
        [EXIT_REASON_INVD]                    = handle_invd,
        [EXIT_REASON_INVLPG]                  = handle_invlpg,
+       [EXIT_REASON_RDTSC]                   = handle_rdtsc,
        [EXIT_REASON_VMCALL]                  = handle_vmcall,
        [EXIT_REASON_VMCLEAR]                 = handle_vmclear,
        [EXIT_REASON_VMLAUNCH]                = handle_vmlaunch,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 09e67fb..1a07796 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -99,6 +99,10 @@ EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
 static u32 tsc_tolerance_ppm = 250;
 module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);

+int kvm_software_tsc = 1;
+module_param_named(software_tsc_emulation, kvm_software_tsc, bool, 0644);
+EXPORT_SYMBOL_GPL(kvm_software_tsc);
+
 #define KVM_NR_SHARED_MSRS 16

 struct kvm_shared_msrs_global {
@@ -993,7 +997,8 @@ static inline u64 get_kernel_ns(void)
 }

 static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
-unsigned long max_tsc_khz;
+unsigned long kvm_max_tsc_khz;
+EXPORT_SYMBOL_GPL(kvm_max_tsc_khz);

 static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
 {
@@ -1001,7 +1006,7 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, 
u64 nsec)
                                   vcpu->arch.virtual_tsc_shift);
 }

-static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
+void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
 {
        u32 thresh_lo, thresh_hi;
        int use_scaling = 0;
@@ -1026,6 +1031,7 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 
this_tsc_khz)
        }
        kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling);   
 }
+EXPORT_SYMBOL_GPL(kvm_set_tsc_khz);

 static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
 {
@@ -1117,6 +1123,18 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)

 EXPORT_SYMBOL_GPL(kvm_write_tsc);

+void kvm_read_tsc(struct kvm_vcpu *vcpu)
+{
+       u64 tsc;
+       s64 kernel_ns = get_kernel_ns();
+
+       tsc = compute_guest_tsc(vcpu, kernel_ns);
+       kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)tsc);
+       kvm_register_write(vcpu, VCPU_REGS_RDX, tsc>>  32);
+       kvm_x86_ops->skip_emulated_instruction(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_read_tsc);
+
 static int kvm_guest_time_update(struct kvm_vcpu *v)
 {
        unsigned long flags;
@@ -4931,7 +4949,7 @@ static void kvm_timer_init(void)
 {
        int cpu;

-       max_tsc_khz = tsc_khz;
+       kvm_max_tsc_khz = tsc_khz;
        register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
        if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
 #ifdef CONFIG_CPU_FREQ
@@ -4940,13 +4958,13 @@ static void kvm_timer_init(void)
                cpu = get_cpu();
                cpufreq_get_policy(&policy, cpu);
                if (policy.cpuinfo.max_freq)
-                       max_tsc_khz = policy.cpuinfo.max_freq;
+                       kvm_max_tsc_khz = policy.cpuinfo.max_freq;
                put_cpu();
 #endif
                cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
                                          CPUFREQ_TRANSITION_NOTIFIER);
        }
-       pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
+       pr_debug("kvm: max_tsc_khz = %ld\n", kvm_max_tsc_khz);
        for_each_online_cpu(cpu)
                smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
 }
@@ -6194,10 +6212,6 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
                                                unsigned int id)
 {
-       if (check_tsc_unstable()&&  atomic_read(&kvm->online_vcpus) != 0)
-               printk_once(KERN_WARNING
-               "kvm: SMP vm created on host with unstable TSC; "
-               "guest TSC will not be reliable\n");
        return kvm_x86_ops->vcpu_create(kvm, id);
 }

@@ -6385,8 +6399,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        }
        vcpu->arch.pio_data = page_address(page);

-       kvm_set_tsc_khz(vcpu, max_tsc_khz);
-
        r = kvm_mmu_create(vcpu);
        if (r<  0)
                goto fail_free_pio_data;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 256da82..94780df 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -80,6 +80,10 @@ void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
 int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);

 void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data);
+void kvm_read_tsc(struct kvm_vcpu *vcpu);
+void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz);
+extern int kvm_software_tsc;
+extern unsigned long kvm_max_tsc_khz;

 int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
        gva_t addr, void *val, unsigned int bytes,
@@ -89,4 +93,5 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
        gva_t addr, void *val, unsigned int bytes,
        struct x86_exception *exception);

+
 #endif
--
1.7.1


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to