From: Manali Shukla <[email protected]>
Execution of the HLT instruction by a vCPU can be intercepted by the
hypervisor by setting the HLT-Intercept Bit in VMCB, thus resulting in
a VMEXIT. It can be possible that soon after the VMEXIT, hypervisor
observes that there are pending V_INTR and V_NMI events for the vCPU and
causes it to perform a VMRUN to service those events. In that case
VMEXIT is wasteful.
The Idle HLT intercept feature allows for the HLT instruction execution
by a vCPU to be intercepted by hypervisor only if there are no pending
V_INTR and V_NMI events for the vCPU. The Idle HLT intercept will not be
triggerred, when vCPU is expected to have pending events (V_INTR and
V_NMI).
The feature allows the hypervisor to determine whether vCPU is idle and
reduces wasteful VMEXITs.
Details about the Idle HLT intercept feature can be found in AMD APM [1].
[1]: AMD64 Architecture Programmer's Manual Pub. 24593, April
2024, Vol 2, 15.9 Instruction Intercepts (Table 15-7: IDLE_HLT).
https://bugzilla.kernel.org/attachment.cgi?id=306250
Signed-off-by: Manali Shukla <[email protected]>
---
arch/x86/include/asm/svm.h | 1 +
arch/x86/include/uapi/asm/svm.h | 2 ++
arch/x86/kvm/svm/svm.c | 11 ++++++++---
3 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 728c98175b9c..3a91928a4060 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -116,6 +116,7 @@ enum {
INTERCEPT_INVPCID,
INTERCEPT_MCOMMIT,
INTERCEPT_TLBSYNC,
+ INTERCEPT_IDLE_HLT = 166,
};
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 80e1df482337..9910f86a2cef 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -95,6 +95,7 @@
#define SVM_EXIT_CR14_WRITE_TRAP 0x09e
#define SVM_EXIT_CR15_WRITE_TRAP 0x09f
#define SVM_EXIT_INVPCID 0x0a2
+#define SVM_EXIT_IDLE_HLT 0x0a6
#define SVM_EXIT_NPF 0x400
#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402
@@ -223,6 +224,7 @@
{ SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \
{ SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \
{ SVM_EXIT_INVPCID, "invpcid" }, \
+ { SVM_EXIT_IDLE_HLT, "idle-halt" }, \
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 0f3b59da0d4a..223c670bf986 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1289,8 +1289,12 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
svm_set_intercept(svm, INTERCEPT_MWAIT);
}
- if (!kvm_hlt_in_guest(vcpu->kvm))
- svm_set_intercept(svm, INTERCEPT_HLT);
+ if (!kvm_hlt_in_guest(vcpu->kvm)) {
+ if (cpu_feature_enabled(X86_FEATURE_IDLE_HLT))
+ svm_set_intercept(svm, INTERCEPT_IDLE_HLT);
+ else
+ svm_set_intercept(svm, INTERCEPT_HLT);
+ }
control->iopm_base_pa = __sme_set(iopm_base);
control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
@@ -3291,6 +3295,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu
*vcpu) = {
[SVM_EXIT_CR4_WRITE_TRAP] = cr_trap,
[SVM_EXIT_CR8_WRITE_TRAP] = cr_trap,
[SVM_EXIT_INVPCID] = invpcid_interception,
+ [SVM_EXIT_IDLE_HLT] = kvm_emulate_halt,
[SVM_EXIT_NPF] = npf_interception,
[SVM_EXIT_RSM] = rsm_interception,
[SVM_EXIT_AVIC_INCOMPLETE_IPI] =
avic_incomplete_ipi_interception,
@@ -3453,7 +3458,7 @@ int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64
exit_code)
return interrupt_window_interception(vcpu);
else if (exit_code == SVM_EXIT_INTR)
return intr_interception(vcpu);
- else if (exit_code == SVM_EXIT_HLT)
+ else if (exit_code == SVM_EXIT_HLT || exit_code == SVM_EXIT_IDLE_HLT)
return kvm_emulate_halt(vcpu);
else if (exit_code == SVM_EXIT_NPF)
return npf_interception(vcpu);
--
2.34.1