From: Sean Christopherson <sea...@google.com> Sent: Wednesday, February 26, 
2025 6:19 PM
> 
> Add a "tsc_properties" set of flags and use it to annotate whether the
> TSC operates at a known and/or reliable frequency when registering a
> paravirtual TSC calibration routine.  Currently, each PV flow manually
> sets the associated feature flags, but often in haphazard fashion that
> makes it difficult for unfamiliar readers to see the properties of the
> TSC when running under a particular hypervisor.
> 
> The other, bigger issue with manually setting the feature flags is that
> it decouples the flags from the calibration routine.  E.g. in theory, PV
> code could mark the TSC as having a known frequency, but then have its
> PV calibration discarded in favor of a method that doesn't use that known
> frequency.  Passing the TSC properties along with the calibration routine
> will allow adding sanity checks to guard against replacing a "better"
> calibration routine with a "worse" routine.
> 
> As a bonus, the flags also give developers working on new PV code a heads
> up that they should at least mark the TSC as having a known frequency.
> 
> Signed-off-by: Sean Christopherson <sea...@google.com>
> ---
>  arch/x86/coco/sev/core.c       |  6 ++----
>  arch/x86/coco/tdx/tdx.c        |  7 ++-----
>  arch/x86/include/asm/tsc.h     |  8 +++++++-
>  arch/x86/kernel/cpu/acrn.c     |  4 ++--
>  arch/x86/kernel/cpu/mshyperv.c | 10 +++++++---
>  arch/x86/kernel/cpu/vmware.c   |  7 ++++---
>  arch/x86/kernel/jailhouse.c    |  4 ++--
>  arch/x86/kernel/kvmclock.c     |  4 ++--
>  arch/x86/kernel/tsc.c          |  8 +++++++-
>  arch/x86/xen/time.c            |  4 ++--
>  10 files changed, 37 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
> index dab386f782ce..29dd50552715 100644
> --- a/arch/x86/coco/sev/core.c
> +++ b/arch/x86/coco/sev/core.c
> @@ -3284,12 +3284,10 @@ void __init snp_secure_tsc_init(void)
>  {
>       unsigned long long tsc_freq_mhz;
> 
> -     setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
> -     setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
> -
>       rdmsrl(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
>       snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000);
> 
>       tsc_register_calibration_routines(securetsc_get_tsc_khz,
> -                                       securetsc_get_tsc_khz);
> +                                       securetsc_get_tsc_khz,
> +                                       TSC_FREQ_KNOWN_AND_RELIABLE);
>  }
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index 42cdaa98dc5e..ca31560d0dd3 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -1135,14 +1135,11 @@ static unsigned long tdx_get_tsc_khz(void)
> 
>  void __init tdx_tsc_init(void)
>  {
> -     /* TSC is the only reliable clock in TDX guest */
> -     setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
> -     setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
> -
>       /*
>        * Override the PV calibration routines (if set) with more trustworthy
>        * CPUID-based calibration.  The TDX module emulates CPUID, whereas any
>        * PV information is provided by the hypervisor.
>        */
> -     tsc_register_calibration_routines(tdx_get_tsc_khz, NULL);
> +     tsc_register_calibration_routines(tdx_get_tsc_khz, NULL,
> +                                       TSC_FREQ_KNOWN_AND_RELIABLE);
>  }
> diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
> index 9318c74e8d13..360f47610258 100644
> --- a/arch/x86/include/asm/tsc.h
> +++ b/arch/x86/include/asm/tsc.h
> @@ -41,8 +41,14 @@ extern int cpuid_get_cpu_freq(unsigned int *cpu_khz);
>  extern void tsc_early_init(void);
>  extern void tsc_init(void);
>  #if defined(CONFIG_HYPERVISOR_GUEST) || defined(CONFIG_AMD_MEM_ENCRYPT)
> +enum tsc_properties {
> +     TSC_FREQUENCY_KNOWN     = BIT(0),
> +     TSC_RELIABLE            = BIT(1),
> +     TSC_FREQ_KNOWN_AND_RELIABLE = TSC_FREQUENCY_KNOWN | TSC_RELIABLE,
> +};
>  extern void tsc_register_calibration_routines(unsigned long 
> (*calibrate_tsc)(void),
> -                                           unsigned long 
> (*calibrate_cpu)(void));
> +                                           unsigned long 
> (*calibrate_cpu)(void),
> +                                           enum tsc_properties properties);
>  #endif
>  extern void mark_tsc_unstable(char *reason);
>  extern int unsynchronized_tsc(void);
> diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c
> index 2da3de4d470e..4f2f4f7ec334 100644
> --- a/arch/x86/kernel/cpu/acrn.c
> +++ b/arch/x86/kernel/cpu/acrn.c
> @@ -29,9 +29,9 @@ static void __init acrn_init_platform(void)
>       /* Install system interrupt handler for ACRN hypervisor callback */
>       sysvec_install(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback);
> 
> -     setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
>       tsc_register_calibration_routines(acrn_get_tsc_khz,
> -                                       acrn_get_tsc_khz);
> +                                       acrn_get_tsc_khz,
> +                                       TSC_FREQUENCY_KNOWN);
>  }
> 
>  static bool acrn_x2apic_available(void)
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 174f6a71c899..445ac3adfebc 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -421,8 +421,13 @@ static void __init ms_hyperv_init_platform(void)
> 
>       if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS &&
>           ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
> -             tsc_register_calibration_routines(hv_get_tsc_khz, 
> hv_get_tsc_khz);
> -             setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
> +             enum tsc_properties tsc_properties = TSC_FREQUENCY_KNOWN;
> +
> +             if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)
> +                     tsc_properties = TSC_FREQ_KNOWN_AND_RELIABLE;
> +
> +             tsc_register_calibration_routines(hv_get_tsc_khz, 
> hv_get_tsc_khz,
> +                                               tsc_properties);
>       }

For the Hyper-V guest code,

Reviewed-by: Michael Kelley <mhkli...@outlook.com>
Tested-by: Michael Kelley <mhkli...@outlook.com>

> 
>       if (ms_hyperv.priv_high & HV_ISOLATION) {
> @@ -525,7 +530,6 @@ static void __init ms_hyperv_init_platform(void)
>                * is called.
>                */
>               wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL,
> HV_EXPOSE_INVARIANT_TSC);
> -             setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
>       }
> 
>       /*
> diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
> index 399cf3286a60..a3a71309214c 100644
> --- a/arch/x86/kernel/cpu/vmware.c
> +++ b/arch/x86/kernel/cpu/vmware.c
> @@ -385,10 +385,10 @@ static void __init vmware_paravirt_ops_setup(void)
>   */
>  static void __init vmware_set_capabilities(void)
>  {
> +     /* TSC is non-stop and reliable even if the frequency isn't known. */
>       setup_force_cpu_cap(X86_FEATURE_CONSTANT_TSC);
>       setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
> -     if (vmware_tsc_khz)
> -             setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
> +
>       if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMCALL)
>               setup_force_cpu_cap(X86_FEATURE_VMCALL);
>       else if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMMCALL)
> @@ -417,7 +417,8 @@ static void __init vmware_platform_setup(void)
> 
>               vmware_tsc_khz = tsc_khz;
>               tsc_register_calibration_routines(vmware_get_tsc_khz,
> -                                               vmware_get_tsc_khz);
> +                                               vmware_get_tsc_khz,
> +                                               TSC_FREQ_KNOWN_AND_RELIABLE);
> 
>  #ifdef CONFIG_X86_LOCAL_APIC
>               /* Skip lapic calibration since we know the bus frequency. */
> diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c
> index b0a053692161..d73a4d0fb118 100644
> --- a/arch/x86/kernel/jailhouse.c
> +++ b/arch/x86/kernel/jailhouse.c
> @@ -218,7 +218,8 @@ static void __init jailhouse_init_platform(void)
> 
>       machine_ops.emergency_restart           = jailhouse_no_restart;
> 
> -     tsc_register_calibration_routines(jailhouse_get_tsc, jailhouse_get_tsc);
> +     tsc_register_calibration_routines(jailhouse_get_tsc, jailhouse_get_tsc,
> +                                       TSC_FREQUENCY_KNOWN);
> 
>       while (pa_data) {
>               mapping = early_memremap(pa_data, sizeof(header));
> @@ -256,7 +257,6 @@ static void __init jailhouse_init_platform(void)
>       pr_debug("Jailhouse: PM-Timer IO Port: %#x\n", pmtmr_ioport);
> 
>       precalibrated_tsc_khz = setup_data.v1.tsc_khz;
> -     setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
> 
>       pci_probe = 0;
> 
> diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
> index 1dbe12ecb26e..ce676e735ced 100644
> --- a/arch/x86/kernel/kvmclock.c
> +++ b/arch/x86/kernel/kvmclock.c
> @@ -199,7 +199,6 @@ void kvmclock_cpu_action(enum kvm_guest_cpu_action action)
>   */
>  static unsigned long kvm_get_tsc_khz(void)
>  {
> -     setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
>       return pvclock_tsc_khz(this_cpu_pvti());
>  }
> 
> @@ -403,7 +402,8 @@ void __init kvmclock_init(void)
> 
>       kvm_sched_clock_init(stable);
> 
> -     tsc_register_calibration_routines(kvm_get_tsc_khz, kvm_get_tsc_khz);
> +     tsc_register_calibration_routines(kvm_get_tsc_khz, kvm_get_tsc_khz,
> +                                       TSC_FREQUENCY_KNOWN);
> 
>       x86_platform.get_wallclock = kvm_get_wallclock;
>       x86_platform.set_wallclock = kvm_set_wallclock;
> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index 5501d76243c8..be58df4fef66 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -1301,11 +1301,17 @@ static void __init check_system_tsc_reliable(void)
>   */
>  #if defined(CONFIG_HYPERVISOR_GUEST) || defined(CONFIG_AMD_MEM_ENCRYPT)
>  void tsc_register_calibration_routines(unsigned long (*calibrate_tsc)(void),
> -                                    unsigned long (*calibrate_cpu)(void))
> +                                    unsigned long (*calibrate_cpu)(void),
> +                                    enum tsc_properties properties)
>  {
>       if (WARN_ON_ONCE(!calibrate_tsc))
>               return;
> 
> +     if (properties & TSC_FREQUENCY_KNOWN)
> +             setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
> +     if (properties & TSC_RELIABLE)
> +             setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
> +
>       x86_platform.calibrate_tsc = calibrate_tsc;
>       if (calibrate_cpu)
>               x86_platform.calibrate_cpu = calibrate_cpu;
> diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
> index 13e5888c4501..4de06ea55397 100644
> --- a/arch/x86/xen/time.c
> +++ b/arch/x86/xen/time.c
> @@ -40,7 +40,6 @@ static unsigned long xen_tsc_khz(void)
>       struct pvclock_vcpu_time_info *info =
>               &HYPERVISOR_shared_info->vcpu_info[0].time;
> 
> -     setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
>       return pvclock_tsc_khz(info);
>  }
> 
> @@ -571,7 +570,8 @@ static void __init xen_init_time_common(void)
>        */
>       paravirt_set_sched_clock(xen_sched_clock, NULL, NULL);
> 
> -     tsc_register_calibration_routines(xen_tsc_khz, NULL);
> +     tsc_register_calibration_routines(xen_tsc_khz, NULL,
> +                                       TSC_FREQUENCY_KNOWN);
>       x86_platform.get_wallclock = xen_get_wallclock;
>  }
> 
> --
> 2.48.1.711.g2feabab25a-goog
> 


Reply via email to