From: David Woodhouse <[email protected]> When a masterclock update is triggered (e.g. by the clocksource change notifier), KVM_REQ_MASTERCLOCK_UPDATE is set on all vCPUs. Without this fix, each vCPU independently processes the request and redundantly re-executes the entire pvclock_update_vm_gtod_copy() sequence, serialized only by tsc_write_lock. Each redundant re-snapshot of the master clock reference point introduces potential clock drift.
Fix this by having __kvm_start_pvclock_update() check, after acquiring the lock, whether the requesting vCPU's KVM_REQ_MASTERCLOCK_UPDATE is still set. If another vCPU already did the update and cleared it, bail out. Otherwise, clear the request on all other vCPUs before proceeding. The caller in vcpu_enter_guest() now uses kvm_test_request() (non-clearing) since the clearing is done inside __kvm_start_pvclock_update() under the lock. Suggested-by: Dongli Zhang <[email protected]> Signed-off-by: David Woodhouse <[email protected]> --- arch/x86/kvm/x86.c | 56 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7d9ec0638d28..77dfd4455a4e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3259,10 +3259,39 @@ static void kvm_make_mclock_inprogress_request(struct kvm *kvm) kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); } -static void __kvm_start_pvclock_update(struct kvm *kvm) +static void kvm_clear_mclock_inprogress_request(struct kvm *kvm) { + struct kvm_vcpu *vcpu; + unsigned long i; + + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu); +} + +static bool __kvm_start_pvclock_update(struct kvm *kvm, struct kvm_vcpu *requesting_vcpu) +{ + struct kvm_vcpu *vcpu; + unsigned long i; + raw_spin_lock_irq(&kvm->arch.tsc_write_lock); + + /* + * If another vCPU already did the update while we were waiting + * for the lock, our request will have been cleared. Bail out. + */ + if (requesting_vcpu && + !kvm_test_request(KVM_REQ_MASTERCLOCK_UPDATE, requesting_vcpu)) { + kvm_clear_mclock_inprogress_request(kvm); + raw_spin_unlock_irq(&kvm->arch.tsc_write_lock); + return false; + } + + /* The update is VM-wide; prevent other vCPUs from redoing it. */ + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_clear_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); + write_seqcount_begin(&kvm->arch.pvclock_sc); + return true; } static void kvm_start_pvclock_update(struct kvm *kvm) @@ -3270,7 +3299,7 @@ static void kvm_start_pvclock_update(struct kvm *kvm) kvm_make_mclock_inprogress_request(kvm); /* no guest entries from this point */ - __kvm_start_pvclock_update(kvm); + __kvm_start_pvclock_update(kvm, NULL); } static void kvm_end_pvclock_update(struct kvm *kvm) @@ -3279,22 +3308,25 @@ static void kvm_end_pvclock_update(struct kvm *kvm) struct kvm_vcpu *vcpu; unsigned long i; - write_seqcount_end(&ka->pvclock_sc); - raw_spin_unlock_irq(&ka->tsc_write_lock); kvm_for_each_vcpu(i, vcpu, kvm) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); /* guest entries allowed */ - kvm_for_each_vcpu(i, vcpu, kvm) - kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu); + kvm_clear_mclock_inprogress_request(kvm); + + write_seqcount_end(&ka->pvclock_sc); + raw_spin_unlock_irq(&ka->tsc_write_lock); } -static void kvm_update_masterclock(struct kvm *kvm) +static void kvm_update_masterclock(struct kvm *kvm, struct kvm_vcpu *vcpu) { kvm_hv_request_tsc_page_update(kvm); - kvm_start_pvclock_update(kvm); - pvclock_update_vm_gtod_copy(kvm); - kvm_end_pvclock_update(kvm); + kvm_make_mclock_inprogress_request(kvm); + + if (__kvm_start_pvclock_update(kvm, vcpu)) { + pvclock_update_vm_gtod_copy(kvm); + kvm_end_pvclock_update(kvm); + } } /* @@ -11485,8 +11517,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_mmu_free_obsolete_roots(vcpu); if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) __kvm_migrate_timers(vcpu); - if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu)) - kvm_update_masterclock(vcpu->kvm); + if (kvm_test_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu)) + kvm_update_masterclock(vcpu->kvm, vcpu); if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu)) kvm_gen_kvmclock_update(vcpu); if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) { -- 2.51.0

