On Wed, Nov 02, 2022 at 11:18:35PM +0000, Sean Christopherson wrote:
> Now that kvm_arch_hardware_setup() is called immediately after
> kvm_arch_init(), fold the guts of kvm_arch_hardware_(un)setup() into
> kvm_arch_{init,exit}() as a step towards dropping one of the hooks.
>
> To avoid having to unwind various setup, e.g registration of several
> notifiers, slot in the vendor hardware setup before the registration of
> said notifiers and callbacks.  Introducing a functional change while
> moving code is less than ideal, but the alternative is adding a pile of
> unwinding code, which is much more error prone, e.g. several attempts to
> move the setup code verbatim all introduced bugs.
>
> Add a comment to document that kvm_ops_update() is effectively the point
> of no return, e.g. it sets the kvm_x86_ops.hardware_enable canary and so
> needs to be unwound.
>
> Signed-off-by: Sean Christopherson <sea...@google.com>
> ---
>  arch/x86/kvm/x86.c | 121 +++++++++++++++++++++++----------------------
>  1 file changed, 63 insertions(+), 58 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 9a7702b1c563..80ee580a9cd4 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -9252,6 +9252,24 @@ static struct notifier_block pvclock_gtod_notifier = {
>  };
>  #endif
>
> +static inline void kvm_ops_update(struct kvm_x86_init_ops *ops)
> +{
> +     memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
> +
> +#define __KVM_X86_OP(func) \
> +     static_call_update(kvm_x86_##func, kvm_x86_ops.func);
> +#define KVM_X86_OP(func) \
> +     WARN_ON(!kvm_x86_ops.func); __KVM_X86_OP(func)
> +#define KVM_X86_OP_OPTIONAL __KVM_X86_OP
> +#define KVM_X86_OP_OPTIONAL_RET0(func) \
> +     static_call_update(kvm_x86_##func, (void *)kvm_x86_ops.func ? : \
> +                                        (void *)__static_call_return0);
> +#include <asm/kvm-x86-ops.h>
> +#undef __KVM_X86_OP
> +
> +     kvm_pmu_ops_update(ops->pmu_ops);
> +}
> +
>  int kvm_arch_init(void *opaque)
>  {
>       struct kvm_x86_init_ops *ops = opaque;
> @@ -9325,6 +9343,24 @@ int kvm_arch_init(void *opaque)
>               kvm_caps.supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
>       }
>
> +     rdmsrl_safe(MSR_EFER, &host_efer);
> +
> +     if (boot_cpu_has(X86_FEATURE_XSAVES))
> +             rdmsrl(MSR_IA32_XSS, host_xss);
> +
> +     kvm_init_pmu_capability();
> +
> +     r = ops->hardware_setup();
> +     if (r != 0)
> +             goto out_mmu_exit;

The failure case of ops->hardware_setup() is unwound
by kvm_arch_exit() before this patch, do we need to
keep that old behavior ?

> +
> +     /*
> +      * Point of no return!  DO NOT add error paths below this point unless
> +      * absolutely necessary, as most operations from this point forward
> +      * require unwinding.
> +      */
> +     kvm_ops_update(ops);
> +
>       kvm_timer_init();
>
>       if (pi_inject_timer == -1)
> @@ -9336,8 +9372,32 @@ int kvm_arch_init(void *opaque)
>               set_hv_tscchange_cb(kvm_hyperv_tsc_notifier);
>  #endif
>
> +     kvm_register_perf_callbacks(ops->handle_intel_pt_intr);
> +
> +     if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
> +             kvm_caps.supported_xss = 0;
> +
> +#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
> +     cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
> +#undef __kvm_cpu_cap_has
> +
> +     if (kvm_caps.has_tsc_control) {
> +             /*
> +              * Make sure the user can only configure tsc_khz values that
> +              * fit into a signed integer.
> +              * A min value is not calculated because it will always
> +              * be 1 on all machines.
> +              */
> +             u64 max = min(0x7fffffffULL,
> +                           __scale_tsc(kvm_caps.max_tsc_scaling_ratio, 
> tsc_khz));
> +             kvm_caps.max_guest_tsc_khz = max;
> +     }
> +     kvm_caps.default_tsc_scaling_ratio = 1ULL << 
> kvm_caps.tsc_scaling_ratio_frac_bits;
> +     kvm_init_msr_list();
>       return 0;
>
> +out_mmu_exit:
> +     kvm_mmu_vendor_module_exit();
>  out_free_percpu:
>       free_percpu(user_return_msrs);
>  out_free_x86_emulator_cache:
> @@ -9347,6 +9407,8 @@ int kvm_arch_init(void *opaque)
>
>  void kvm_arch_exit(void)
>  {
> +     kvm_unregister_perf_callbacks();
> +
>  #ifdef CONFIG_X86_64
>       if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
>               clear_hv_tscchange_cb();
> @@ -9362,6 +9424,7 @@ void kvm_arch_exit(void)
>       irq_work_sync(&pvclock_irq_work);
>       cancel_work_sync(&pvclock_gtod_work);
>  #endif
> +     static_call(kvm_x86_hardware_unsetup)();
>       kvm_x86_ops.hardware_enable = NULL;
>       kvm_mmu_vendor_module_exit();
>       free_percpu(user_return_msrs);
> @@ -11922,72 +11985,14 @@ void kvm_arch_hardware_disable(void)
>       drop_user_return_notifiers();
>  }
>
> -static inline void kvm_ops_update(struct kvm_x86_init_ops *ops)
> -{
> -     memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
> -
> -#define __KVM_X86_OP(func) \
> -     static_call_update(kvm_x86_##func, kvm_x86_ops.func);
> -#define KVM_X86_OP(func) \
> -     WARN_ON(!kvm_x86_ops.func); __KVM_X86_OP(func)
> -#define KVM_X86_OP_OPTIONAL __KVM_X86_OP
> -#define KVM_X86_OP_OPTIONAL_RET0(func) \
> -     static_call_update(kvm_x86_##func, (void *)kvm_x86_ops.func ? : \
> -                                        (void *)__static_call_return0);
> -#include <asm/kvm-x86-ops.h>
> -#undef __KVM_X86_OP
> -
> -     kvm_pmu_ops_update(ops->pmu_ops);
> -}
> -
>  int kvm_arch_hardware_setup(void *opaque)
>  {
> -     struct kvm_x86_init_ops *ops = opaque;
> -     int r;
> -
> -     rdmsrl_safe(MSR_EFER, &host_efer);
> -
> -     if (boot_cpu_has(X86_FEATURE_XSAVES))
> -             rdmsrl(MSR_IA32_XSS, host_xss);
> -
> -     kvm_init_pmu_capability();
> -
> -     r = ops->hardware_setup();
> -     if (r != 0)
> -             return r;
> -
> -     kvm_ops_update(ops);
> -
> -     kvm_register_perf_callbacks(ops->handle_intel_pt_intr);
> -
> -     if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
> -             kvm_caps.supported_xss = 0;
> -
> -#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
> -     cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
> -#undef __kvm_cpu_cap_has
> -
> -     if (kvm_caps.has_tsc_control) {
> -             /*
> -              * Make sure the user can only configure tsc_khz values that
> -              * fit into a signed integer.
> -              * A min value is not calculated because it will always
> -              * be 1 on all machines.
> -              */
> -             u64 max = min(0x7fffffffULL,
> -                           __scale_tsc(kvm_caps.max_tsc_scaling_ratio, 
> tsc_khz));
> -             kvm_caps.max_guest_tsc_khz = max;
> -     }
> -     kvm_caps.default_tsc_scaling_ratio = 1ULL << 
> kvm_caps.tsc_scaling_ratio_frac_bits;
> -     kvm_init_msr_list();
>       return 0;
>  }
>
>  void kvm_arch_hardware_unsetup(void)
>  {
> -     kvm_unregister_perf_callbacks();
>
> -     static_call(kvm_x86_hardware_unsetup)();
>  }
>
>  int kvm_arch_check_processor_compat(void *opaque)
> --
> 2.38.1.431.g37b22c650d-goog
>

Reply via email to