On Fri, Nov 23, 2012 at 11:46:30AM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2012-11-22:
> > On Wed, Nov 21, 2012 at 04:09:36PM +0800, Yang Zhang wrote:
> >> Virtual interrupt delivery avoids KVM to inject vAPIC interrupts
> >> manually, which is fully taken care of by the hardware. This needs
> >> some special awareness into existing interrupr injection path:
> >> 
> >> - for pending interrupt, instead of direct injection, we may need
> >>   update architecture specific indicators before resuming to guest.
> >> - A pending interrupt, which is masked by ISR, should be also
> >>   considered in above update action, since hardware will decide
> >>   when to inject it at right time. Current has_interrupt and
> >>   get_interrupt only returns a valid vector from injection p.o.v.
> >> Signed-off-by: Yang Zhang <[email protected]>
> >> Signed-off-by: Kevin Tian <[email protected]>
> >> ---
> >>  arch/x86/include/asm/kvm_host.h |    4 + arch/x86/include/asm/vmx.h   
> >>    |   11 ++++ arch/x86/kvm/irq.c              |   44 ++++++++++++++
> >>  arch/x86/kvm/lapic.c            |   44 +++++++++++++-
> >>  arch/x86/kvm/lapic.h            |   13 ++++ arch/x86/kvm/svm.c        
> >>       |    6 ++ arch/x86/kvm/vmx.c              |  125
> >>  ++++++++++++++++++++++++++++++++++++++- arch/x86/kvm/x86.c            
> >>   |   16 +++++- virt/kvm/ioapic.c               |    1 + 9 files
> >>  changed, 260 insertions(+), 4 deletions(-)
> >> diff --git a/arch/x86/include/asm/kvm_host.h
> >> b/arch/x86/include/asm/kvm_host.h index b2e11f4..8e07a86 100644 ---
> >> a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h
> >> @@ -682,6 +682,10 @@ struct kvm_x86_ops {
> >>    void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
> >>    void (*enable_irq_window)(struct kvm_vcpu *vcpu);
> >>    void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
> >> +  int (*has_virtual_interrupt_delivery)(struct kvm_vcpu *vcpu);
> >> +  void (*update_irq)(struct kvm_vcpu *vcpu);
> >> +  void (*set_eoi_exitmap)(struct kvm_vcpu *vcpu, int vector,
> >> +                  int need_eoi, int global);
> >>    int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
> >>    int (*get_tdp_level)(void);
> >>    u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
> >> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> >> index 21101b6..1003341 100644
> >> --- a/arch/x86/include/asm/vmx.h
> >> +++ b/arch/x86/include/asm/vmx.h
> >> @@ -62,6 +62,7 @@
> >>  #define EXIT_REASON_MCE_DURING_VMENTRY  41 #define
> >>  EXIT_REASON_TPR_BELOW_THRESHOLD 43 #define EXIT_REASON_APIC_ACCESS    
> >>      44 +#define EXIT_REASON_EOI_INDUCED         45 #define
> >>  EXIT_REASON_EPT_VIOLATION       48 #define EXIT_REASON_EPT_MISCONFIG  
> >>      49 #define EXIT_REASON_WBINVD              54 @@ -143,6 +144,7 @@
> >>  #define SECONDARY_EXEC_WBINVD_EXITING             0x00000040 #define
> >>  SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 #define
> >>  SECONDARY_EXEC_APIC_REGISTER_VIRT       0x00000100 +#define
> >>  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY    0x00000200 #define
> >>  SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define
> >>  SECONDARY_EXEC_ENABLE_INVPCID             0x00001000
> >> @@ -180,6 +182,7 @@ enum vmcs_field {
> >>    GUEST_GS_SELECTOR               = 0x0000080a,   GUEST_LDTR_SELECTOR   
> >>           = 0x0000080c,    GUEST_TR_SELECTOR               = 0x0000080e,
> >>  + GUEST_INTR_STATUS               = 0x00000810,   HOST_ES_SELECTOR     
> >>            = 0x00000c00,   HOST_CS_SELECTOR                = 0x00000c02,
> >>    HOST_SS_SELECTOR                = 0x00000c04, @@ -207,6 +210,14 @@
> >>  enum vmcs_field {         APIC_ACCESS_ADDR_HIGH           = 0x00002015,   
> >> EPT_POINTER  
> >>                    = 0x0000201a,   EPT_POINTER_HIGH                =
> >>  0x0000201b,
> >> +  EOI_EXIT_BITMAP0                = 0x0000201c,
> >> +  EOI_EXIT_BITMAP0_HIGH           = 0x0000201d,
> >> +  EOI_EXIT_BITMAP1                = 0x0000201e,
> >> +  EOI_EXIT_BITMAP1_HIGH           = 0x0000201f,
> >> +  EOI_EXIT_BITMAP2                = 0x00002020,
> >> +  EOI_EXIT_BITMAP2_HIGH           = 0x00002021,
> >> +  EOI_EXIT_BITMAP3                = 0x00002022,
> >> +  EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
> >>    GUEST_PHYSICAL_ADDRESS          = 0x00002400,
> >>    GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
> >>    VMCS_LINK_POINTER               = 0x00002800,
> >> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> >> index 7e06ba1..c7356a3 100644
> >> --- a/arch/x86/kvm/irq.c
> >> +++ b/arch/x86/kvm/irq.c
> >> @@ -60,6 +60,29 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
> >>  EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
> >>  
> >>  /*
> >> + * check if there is pending interrupt without
> >> + * intack. This _apicv version is used when hardware
> >> + * supports APIC virtualization with virtual interrupt
> >> + * delivery support. In such case, KVM is not required
> >> + * to poll pending APIC interrupt, and thus this
> >> + * interface is used to poll pending interupts from
> >> + * non-APIC source.
> >> + */
> >> +int kvm_cpu_has_extint(struct kvm_vcpu *v)
> >> +{
> >> +  struct kvm_pic *s;
> >> +
> >> +  if (!irqchip_in_kernel(v->kvm))
> >> +          return v->arch.interrupt.pending;
> >> +
> > This does not belong here. If !irqchip_in_kernel() the function will not
> > be called. Hmm actually with !irqchip_in_kernel() kernel will oops in
> > kvm_apic_vid_enabled() since it dereference vcpu->arch.apic without
> > checking if it is NULL.
> 
> Right. Will remove it in next version and add the check in 
> kvm_apic_vid_enabled.
>  
> > 
> >> +  if (kvm_apic_accept_pic_intr(v)) {
> >> +          s = pic_irqchip(v->kvm);        /* PIC */
> >> +          return s->output;
> >> +  } else
> >> +          return 0;
> > This is code duplication from kvm_cpu_has_interrupt(). Write common
> > function and call it from kvm_cpu_has_interrupt(), but even that is
> > not needed, see below.
> 
> Why it is not needed? 
Because it you change kvm_cpu_has_interrupt() like I described below the
code path that uses this function will not be needed.

>  
> >> +}
> >> +
> >> +/*
> >>   * Read pending interrupt vector and intack.
> >>   */
> >>  int kvm_cpu_get_interrupt(struct kvm_vcpu *v) @@ -82,6 +105,27 @@ int
> >>  kvm_cpu_get_interrupt(struct kvm_vcpu *v) }
> >>  EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
> >> +/*
> >> + * Read pending interrupt vector and intack.
> >> + * Similar to kvm_cpu_has_interrupt_apicv, to get
> >> + * interrupts from non-APIC sources.
> >> + */
> >> +int kvm_cpu_get_extint(struct kvm_vcpu *v)
> >> +{
> >> +  struct kvm_pic *s;
> >> +  int vector = -1;
> >> +
> >> +  if (!irqchip_in_kernel(v->kvm))
> >> +          return v->arch.interrupt.nr;
> > Same as above.
> > 
> >> +
> >> +  if (kvm_apic_accept_pic_intr(v)) {
> >> +          s = pic_irqchip(v->kvm);
> >> +          s->output = 0;          /* PIC */
> >> +          vector = kvm_pic_read_irq(v->kvm);
> > Ditto about code duplication.
> > 
> >> +  }
> >> +  return vector;
> >> +}
> >> +
> >>  void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
> >>  {
> >>    kvm_inject_apic_timer_irqs(vcpu);
> >> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> >> index a63ffdc..af48361 100644
> >> --- a/arch/x86/kvm/lapic.c
> >> +++ b/arch/x86/kvm/lapic.c
> >> @@ -643,6 +643,12 @@ out:
> >>    return ret;
> >>  }
> >> +void kvm_set_eoi_exitmap(struct kvm_vcpu *vcpu, int vector,
> >> +          int need_eoi, int global)
> >> +{
> >> +  kvm_x86_ops->set_eoi_exitmap(vcpu, vector, need_eoi, global);
> >> +}
> >> +
> >>  /*
> >>   * Add a pending IRQ into lapic.
> >>   * Return 1 if successfully added and 0 if discarded.
> >> @@ -664,8 +670,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, 
> >> int
> > delivery_mode,
> >>            if (trig_mode) {
> >>                    apic_debug("level trig mode for vector %d", vector);
> >>                    apic_set_vector(vector, apic->regs + APIC_TMR);
> >> -          } else
> >> +                  kvm_set_eoi_exitmap(vcpu, vector, 1, 0);
> >> +          } else {
> >>                    apic_clear_vector(vector, apic->regs + APIC_TMR);
> >> +                  kvm_set_eoi_exitmap(vcpu, vector, 0, 0);
> > Why not use APIC_TMR directly instead of kvm_set_eoi_exitmap() logic?
> 
> Good idea. It seems more reasonable. 
> 
> >> +          }
> >> 
> >>            result = !apic_test_and_set_irr(vector, apic);
> >>            trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, @@ 
> >> -769,6
> >>  +778,26 @@ static int apic_set_eoi(struct kvm_lapic *apic)        return
> >>  vector; }
> >> +/*
> >> + * this interface assumes a trap-like exit, which has already finished
> >> + * desired side effect including vISR and vPPR update.
> >> + */
> >> +void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
> >> +{
> >> +  struct kvm_lapic *apic = vcpu->arch.apic;
> >> +  int trigger_mode;
> >> +
> >> +  if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR))
> >> +          trigger_mode = IOAPIC_LEVEL_TRIG;
> >> +  else
> >> +          trigger_mode = IOAPIC_EDGE_TRIG;
> >> +
> >> +  if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
> >> +          kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
> >> +  kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
> > More code duplication. Why not call apic_set_eoi() and skip isr/ppr
> > logic there if vid is enabled, or put the logic in common function and
> > call from both places.
> 
> Ok, will change it in next patch.
> 
> >> +}
> >> +EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
> >> +
> >>  static void apic_send_ipi(struct kvm_lapic *apic) {       u32 icr_low =
> >>  kvm_apic_get_reg(apic, APIC_ICR); @@ -1510,6 +1539,8 @@ int
> >>  kvm_create_lapic(struct kvm_vcpu *vcpu)   kvm_lapic_reset(vcpu);
> >>    kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
> >> +  if (kvm_x86_ops->has_virtual_interrupt_delivery(vcpu))
> >> +          apic->vid_enabled = true;
> > What do you have vid_enabled for. This is global, not per apic, state.
> When inject interrupt to guest, we need this to check whether vid is enabled. 
> If not, use old way to handle the interrupt.
> I thing put it in apic is reasonable. Though all vcpu use same configuration, 
> APICv feature is per vcpu too.
> 
How APICv is per vcpu? It is global. Just call 
has_virtual_interrupt_delivery(vcpu)
instead of vid_enabled thing.

> > 
> >>    return 0; nomem_free_apic:      kfree(apic); @@ -1533,6 +1564,17 @@ int
> >>  kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)     return highest_irr; }
> >> +int kvm_apic_get_highest_irr(struct kvm_vcpu *vcpu)
> >> +{
> >> +  struct kvm_lapic *apic = vcpu->arch.apic;
> >> +
> >> +  if (!apic || !apic_enabled(apic))
> >> +          return -1;
> >> +
> >> +  return apic_find_highest_irr(apic);
> >> +}
> >> +EXPORT_SYMBOL_GPL(kvm_apic_get_highest_irr);
> >> +
> >>  int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
> >>  {
> >>    u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
> >> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> >> index c42f111..2503a64 100644
> >> --- a/arch/x86/kvm/lapic.h
> >> +++ b/arch/x86/kvm/lapic.h
> >> @@ -20,6 +20,7 @@ struct kvm_lapic {
> >>    u32 divide_count;       struct kvm_vcpu *vcpu;  bool irr_pending; +     
> >> bool
> >>  vid_enabled;      /* Number of bits set in ISR. */        s16 isr_count;  
> >> /* The
> >>  highest vector set in ISR; if -1 - invalid, must scan ISR. */ @@ -39,6
> >>  +40,9 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu); int
> >>  kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); int
> >>  kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int
> >>  kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
> >> +int kvm_cpu_has_extint(struct kvm_vcpu *v);
> >> +int kvm_cpu_get_extint(struct kvm_vcpu *v);
> >> +int kvm_apic_get_highest_irr(struct kvm_vcpu *vcpu);
> >>  void kvm_lapic_reset(struct kvm_vcpu *vcpu); u64
> >>  kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); void
> >>  kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); @@ -50,6
> >>  +54,8 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu); int
> >>  kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int
> >>  kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); int
> >>  kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
> >> +void kvm_set_eoi_exitmap(struct kvm_vcpu *vcpu, int vector,
> >> +          int need_eoi, int global);
> >>  int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
> >>  
> >>  bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> >> @@ -65,6 +71,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu
> > *vcpu);
> >>  void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
> >>  
> >>  int kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset);
> >> +void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector);
> >> 
> >>  void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
> >>  void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
> >> @@ -81,6 +88,12 @@ static inline bool
> > kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
> >>    return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
> >>  }
> >> +static inline bool kvm_apic_vid_enabled(struct kvm_vcpu *vcpu)
> >> +{
> >> +  struct kvm_lapic *apic = vcpu->arch.apic;
> >> +  return apic->vid_enabled;
> >> +}
> > vcpu->arch.apic can be NULL from where this is called.
> > 
> >> +
> >>  int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
> >>  void kvm_lapic_init(void);
> >> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> >> index d017df3..b290aba 100644
> >> --- a/arch/x86/kvm/svm.c
> >> +++ b/arch/x86/kvm/svm.c
> >> @@ -3564,6 +3564,11 @@ static void update_cr8_intercept(struct kvm_vcpu
> > *vcpu, int tpr, int irr)
> >>            set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
> >>  }
> >> +static int svm_has_virtual_interrupt_delivery(struct kvm_vcpu *vcpu)
> >> +{
> >> +  return 0;
> >> +}
> >> +
> >>  static int svm_nmi_allowed(struct kvm_vcpu *vcpu) {       struct vcpu_svm
> >>  *svm = to_svm(vcpu); @@ -4283,6 +4288,7 @@ static struct kvm_x86_ops
> >>  svm_x86_ops = {   .enable_nmi_window = enable_nmi_window,
> >>    .enable_irq_window = enable_irq_window,         .update_cr8_intercept =
> >>  update_cr8_intercept,
> >> +  .has_virtual_interrupt_delivery = svm_has_virtual_interrupt_delivery,
> >> 
> >>    .set_tss_addr = svm_set_tss_addr,
> >>    .get_tdp_level = get_npt_level,
> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> >> index e9287aa..c0d74ce 100644
> >> --- a/arch/x86/kvm/vmx.c
> >> +++ b/arch/x86/kvm/vmx.c
> >> @@ -86,6 +86,9 @@ module_param(fasteoi, bool, S_IRUGO);
> >>  static bool __read_mostly enable_apicv_reg = 0;
> >>  module_param(enable_apicv_reg, bool, S_IRUGO);
> >> +static bool __read_mostly enable_apicv_vid = 0;
> >> +module_param(enable_apicv_vid, bool, S_IRUGO);
> >> +
> >>  /*
> >>   * If nested=1, nested virtualization is supported, i.e., guests may use
> >>   * VMX and be a hypervisor for its own guests. If nested=0, guests may not
> >> @@ -432,6 +435,10 @@ struct vcpu_vmx {
> >> 
> >>    bool rdtscp_enabled;
> >> +  u8 eoi_exitmap_changed;
> >> +  u64 eoi_exit_bitmap[4];
> >> +  u64 eoi_exit_bitmap_global[4];
> >> +
> >>    /* Support for a guest hypervisor (nested VMX) */
> >>    struct nested_vmx nested;
> >>  };
> >> @@ -770,6 +777,12 @@ static inline bool
> > cpu_has_vmx_apic_register_virt(void)
> >>            SECONDARY_EXEC_APIC_REGISTER_VIRT;
> >>  }
> >> +static inline bool cpu_has_vmx_virtual_intr_delivery(void)
> >> +{
> >> +  return vmcs_config.cpu_based_2nd_exec_ctrl &
> >> +          SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
> >> +}
> >> +
> >>  static inline bool cpu_has_vmx_flexpriority(void)
> >>  {
> >>    return cpu_has_vmx_tpr_shadow() &&
> >> @@ -2480,7 +2493,8 @@ static __init int setup_vmcs_config(struct
> > vmcs_config *vmcs_conf)
> >>                    SECONDARY_EXEC_PAUSE_LOOP_EXITING |
> >>                    SECONDARY_EXEC_RDTSCP |
> >>                    SECONDARY_EXEC_ENABLE_INVPCID |
> >> -                  SECONDARY_EXEC_APIC_REGISTER_VIRT;
> >> +                  SECONDARY_EXEC_APIC_REGISTER_VIRT |
> >> +                  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
> >>            if (adjust_vmx_controls(min2, opt2,
> >>                                    MSR_IA32_VMX_PROCBASED_CTLS2,
> >>                                    &_cpu_based_2nd_exec_control) < 0)
> >> @@ -2494,7 +2508,8 @@ static __init int setup_vmcs_config(struct
> >> vmcs_config *vmcs_conf)
> >> 
> >>    if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
> >>            _cpu_based_2nd_exec_control &= ~(
> >> -                          SECONDARY_EXEC_APIC_REGISTER_VIRT);
> >> +                          SECONDARY_EXEC_APIC_REGISTER_VIRT |
> >> +                          SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
> >> 
> >>    if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {          
> >> /*
> >>  CR3 accesses and invlpg don't need to cause VM Exits when EPT @@
> >>  -2696,6 +2711,9 @@ static __init int hardware_setup(void)         if
> >>  (!cpu_has_vmx_apic_register_virt())               enable_apicv_reg = 0;
> >> +  if (!cpu_has_vmx_virtual_intr_delivery())
> >> +          enable_apicv_vid = 0;
> >> +
> >>    if (nested)
> >>            nested_vmx_setup_ctls_msrs();
> >> @@ -3811,6 +3829,8 @@ static u32 vmx_secondary_exec_control(struct
> > vcpu_vmx *vmx)
> >>            exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
> >>    if (!enable_apicv_reg)
> >>            exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT;
> >> +  if (!enable_apicv_vid)
> >> +          exec_control &= ~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
> >>    return exec_control;
> >>  }
> >> @@ -3855,6 +3875,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
> >>                            vmx_secondary_exec_control(vmx));
> >>    }
> >> +  if (enable_apicv_vid) {
> >> +          vmcs_write64(EOI_EXIT_BITMAP0, 0);
> >> +          vmcs_write64(EOI_EXIT_BITMAP1, 0);
> >> +          vmcs_write64(EOI_EXIT_BITMAP2, 0);
> >> +          vmcs_write64(EOI_EXIT_BITMAP3, 0);
> >> +
> >> +          vmcs_write16(GUEST_INTR_STATUS, 0);
> >> +  }
> >> +
> >>    if (ple_gap) {
> >>            vmcs_write32(PLE_GAP, ple_gap);
> >>            vmcs_write32(PLE_WINDOW, ple_window);
> >> @@ -4770,6 +4799,16 @@ static int handle_apic_access(struct kvm_vcpu
> > *vcpu)
> >>    return emulate_instruction(vcpu, 0) == EMULATE_DONE;
> >>  }
> >> +static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
> >> +{
> >> +  unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> >> +  int vector = exit_qualification & 0xff;
> >> +
> >> +  /* EOI-induced VM exit is trap-like and thus no need to adjust IP */
> >> +  kvm_apic_set_eoi_accelerated(vcpu, vector);
> >> +  return 1;
> >> +}
> >> +
> >>  static int handle_apic_write(struct kvm_vcpu *vcpu)
> >>  {
> >>    unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> >> @@ -5719,6 +5758,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct
> > kvm_vcpu *vcpu) = {
> >>    [EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
> >>    [EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
> >>    [EXIT_REASON_APIC_WRITE]              = handle_apic_write,
> >>  + [EXIT_REASON_EOI_INDUCED]             = handle_apic_eoi_induced,
> >>    [EXIT_REASON_WBINVD]                  = handle_wbinvd,
> >>    [EXIT_REASON_XSETBV]                  = handle_xsetbv,
> >>    [EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
> >> @@ -6049,6 +6089,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> >> 
> >>  static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
> >>  {
> >> +    /* no need for tpr_threshold update if APIC virtual
> >> +     * interrupt delivery is enabled */
> >> +  if (!enable_apicv_vid)
> >> +          return ;
> > 
> > Just set kvm_x86_ops->update_cr8_intercept to NULL if !enable_apicv_vid
> > and the function will not be called.
> 
> Sure.
>  
> >> +
> >>    if (irr == -1 || tpr < irr) {
> >>            vmcs_write32(TPR_THRESHOLD, 0);
> >>            return;
> >> @@ -6057,6 +6102,79 @@ static void update_cr8_intercept(struct kvm_vcpu
> > *vcpu, int tpr, int irr)
> >>    vmcs_write32(TPR_THRESHOLD, irr);
> >>  }
> >> +static int vmx_has_virtual_interrupt_delivery(struct kvm_vcpu *vcpu)
> >> +{
> >> +  return irqchip_in_kernel(vcpu->kvm) && enable_apicv_vid;
> >> +}
> >> +
> >> +static void vmx_update_irq(struct kvm_vcpu *vcpu)
> >> +{
> >> +  u16 status;
> >> +  u8 old;
> >> +  int vector;
> >> +  struct vcpu_vmx *vmx = to_vmx(vcpu);
> >> +
> >> +  if (!enable_apicv_vid)
> >> +          return ;
> > Ditto. Set kvm_x86_ops->update_irq to a function that does nothing if
> > !enable_apicv_vid. BTW you do not set this callback in SVM code and call
> > it unconditionally.
> > 
> >> +
> >> +  vector = kvm_apic_get_highest_irr(vcpu);
> >> +  if (vector == -1)
> >> +          return;
> >> +
> >> +  status = vmcs_read16(GUEST_INTR_STATUS);
> >> +  old = (u8)status & 0xff;
> >> +  if ((u8)vector != old) {
> >> +          status &= ~0xff;
> >> +          status |= (u8)vector;
> >> +          vmcs_write16(GUEST_INTR_STATUS, status);
> >> +  }
> > Please write RVI assessor functions.
> Sure.
> 
> >> +
> >> +  if (vmx->eoi_exitmap_changed) {
> >> +#define UPDATE_EOI_EXITMAP(v, e) {                                \
> >> +  if ((v)->eoi_exitmap_changed & (1 << (e)))      \
> >> +          vmcs_write64(EOI_EXIT_BITMAP##e,                \
> >> +          (v)->eoi_exit_bitmap[e] | (v)->eoi_exit_bitmap_global[e]); }
> > Inline function would do. But why calculate this on each entry? We want
> > EOI exits only for level IOAPIC interrupts and edge IOAPIC interrupt
> > with registered notifiers. This configuration rarely changes.
> 
> eoi_exitmap_changed is used to track whether the trig mode is changed. As you 
> said, it changes rarely, so this codes seldom will be executed.
> 
But code still checks whether bitmap was changed during each interrupt
injection. Recalculate bitmap when notifier is added/removed or ioapic
configuration changes. Use request bit to reload new bitmap.

> > 
> > 
> >> +
> >> +          UPDATE_EOI_EXITMAP(vmx, 0);
> >> +          UPDATE_EOI_EXITMAP(vmx, 1);
> >> +          UPDATE_EOI_EXITMAP(vmx, 2);
> >> +          UPDATE_EOI_EXITMAP(vmx, 3);
> >> +          vmx->eoi_exitmap_changed = 0;
> >> +  }
> >> +}
> >> +
> >> +static void vmx_set_eoi_exitmap(struct kvm_vcpu *vcpu,
> >> +                          int vector,
> >> +                          int need_eoi, int global)
> >> +{
> >> +  struct vcpu_vmx *vmx = to_vmx(vcpu);
> >> +  int index, offset, changed;
> >> +  unsigned long *eoi_exitmap;
> >> +
> >> +  if (!enable_apicv_vid)
> >> +          return ;
> >> +
> >> +  if (WARN_ONCE((vector < 0) || (vector > 255),
> >> +          "KVM VMX: vector (%d) out of range\n", vector))
> >> +          return;
> >> +
> >> +  index = vector >> 6;
> >> +  offset = vector & 63;
> >> +  if (global)
> >> +          eoi_exitmap =
> >> +              (unsigned long *)&vmx->eoi_exit_bitmap_global[index];
> >> +  else
> >> +          eoi_exitmap = (unsigned long *)&vmx->eoi_exit_bitmap[index];
> >> +
> >> +  if (need_eoi)
> >> +          changed = !test_and_set_bit(offset, eoi_exitmap);
> >> +  else
> >> +          changed = test_and_clear_bit(offset, eoi_exitmap);
> >> +
> >> +  if (changed)
> >> +          vmx->eoi_exitmap_changed |= 1 << index;
> >> +}
> >> +
> >>  static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) {      u32
> >>  exit_intr_info; @@ -7320,6 +7438,9 @@ static struct kvm_x86_ops
> >>  vmx_x86_ops = {   .enable_nmi_window = enable_nmi_window,
> >>    .enable_irq_window = enable_irq_window,         .update_cr8_intercept =
> >>  update_cr8_intercept,
> >> +  .has_virtual_interrupt_delivery = vmx_has_virtual_interrupt_delivery,
> >> +  .update_irq = vmx_update_irq,
> > You need to initialize this one in svm.c too.
> > 
> >> +  .set_eoi_exitmap = vmx_set_eoi_exitmap,
> >> 
> >>    .set_tss_addr = vmx_set_tss_addr,
> >>    .get_tdp_level = get_ept_level,
> >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >> index 4f76417..8b8de3b 100644
> >> --- a/arch/x86/kvm/x86.c
> >> +++ b/arch/x86/kvm/x86.c
> >> @@ -5190,6 +5190,13 @@ static void inject_pending_event(struct kvm_vcpu
> > *vcpu)
> >>                    vcpu->arch.nmi_injected = true;
> >>                    kvm_x86_ops->set_nmi(vcpu);
> >>            }
> >> +  } else if (kvm_apic_vid_enabled(vcpu)) {
> >> +          if (kvm_cpu_has_extint(vcpu) &&
> >> +              kvm_x86_ops->interrupt_allowed(vcpu)) {
> >> +                  kvm_queue_interrupt(vcpu,
> >> +                          kvm_cpu_get_extint(vcpu), false);
> >> +                  kvm_x86_ops->set_irq(vcpu);
> >> +          }
> > Drop all this and modify kvm_cpu_has_interrupt()/kvm_cpu_get_interrupt()
> > to consider apic interrupts only if vid is enabled then the if below
> > will just work.
> Ok.
> 
> > 
> >>    } else if (kvm_cpu_has_interrupt(vcpu)) {
> >>            if (kvm_x86_ops->interrupt_allowed(vcpu)) {
> >>                    kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
> >> @@ -5289,12 +5296,19 @@ static int vcpu_enter_guest(struct kvm_vcpu
> > *vcpu)
> >>    }
> >>  
> >>    if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> >> +          /* update archtecture specific hints for APIC
> >> +           * virtual interrupt delivery */
> >> +          kvm_x86_ops->update_irq(vcpu);
> >> +
> >>            inject_pending_event(vcpu);
> >>  
> >>            /* enable NMI/IRQ window open exits if needed */
> >>            if (vcpu->arch.nmi_pending)
> >>                    kvm_x86_ops->enable_nmi_window(vcpu);
> >> -          else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
> >> +          else if (kvm_apic_vid_enabled(vcpu)) {
> >> +                  if (kvm_cpu_has_extint(vcpu))
> >> +                          kvm_x86_ops->enable_irq_window(vcpu);
> > Same as above. With proper kvm_cpu_has_interrupt() implementation this
> > id is not needed.
> > 
> >> +          } else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
> >>                    kvm_x86_ops->enable_irq_window(vcpu);
> >>  
> >>            if (kvm_lapic_enabled(vcpu)) {
> >> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> >> index 166c450..898aa62 100644
> >> --- a/virt/kvm/ioapic.c
> >> +++ b/virt/kvm/ioapic.c
> >> @@ -186,6 +186,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, 
> >> int
> > irq)
> >>            /* need to read apic_id from apic regiest since                 
> >>  * it can be
> >>  rewritten */              irqe.dest_id = ioapic->kvm->bsp_vcpu_id;
> >>  +         kvm_set_eoi_exitmap(ioapic->kvm->vcpus[0], irqe.vector, 1, 1);  
> >> }
> >>  #endif    return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
> >> --
> >> 1.7.1
> > 
> > --
> >                     Gleb.
> 
> 
> Best regards,
> Yang
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
                        Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to