Posted Interrupt allows vAPICV interrupts to inject into guest directly
without any vmexit.

- When delivering a interrupt to guest, if target vcpu is running,
  update Posted-interrupt requests bitmap and send a notification event
  to the vcpu. Then the vcpu will handle this interrupt automatically,
  without any software involvemnt.

- If target vcpu is not running or there already a notification event
  pending in the vcpu, do nothing. The interrupt will be handled by old
  way.

Signed-off-by: Yang Zhang <[email protected]>
---
 arch/x86/include/asm/kvm_host.h |    3 +
 arch/x86/include/asm/vmx.h      |    4 +
 arch/x86/kernel/apic/io_apic.c  |  138 ++++++++++++++++++++++++++++
 arch/x86/kvm/lapic.c            |   31 ++++++-
 arch/x86/kvm/lapic.h            |    8 ++
 arch/x86/kvm/vmx.c              |  192 +++++++++++++++++++++++++++++++++++++--
 arch/x86/kvm/x86.c              |    2 +
 include/linux/kvm_host.h        |    1 +
 virt/kvm/kvm_main.c             |    2 +
 9 files changed, 372 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8e07a86..1145894 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -683,9 +683,12 @@ struct kvm_x86_ops {
        void (*enable_irq_window)(struct kvm_vcpu *vcpu);
        void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
        int (*has_virtual_interrupt_delivery)(struct kvm_vcpu *vcpu);
+       int (*has_posted_interrupt)(struct kvm_vcpu *vcpu);
        void (*update_irq)(struct kvm_vcpu *vcpu);
        void (*set_eoi_exitmap)(struct kvm_vcpu *vcpu, int vector,
                        int need_eoi, int global);
+       int (*send_nv)(struct kvm_vcpu *vcpu, int vector);
+       void (*pi_migrate)(struct kvm_vcpu *vcpu);
        int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
        int (*get_tdp_level)(void);
        u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 1003341..7b9e1d0 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -152,6 +152,7 @@
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
 #define PIN_BASED_NMI_EXITING                   0x00000008
 #define PIN_BASED_VIRTUAL_NMIS                  0x00000020
+#define PIN_BASED_POSTED_INTR                   0x00000080
 
 #define VM_EXIT_SAVE_DEBUG_CONTROLS             0x00000002
 #define VM_EXIT_HOST_ADDR_SPACE_SIZE            0x00000200
@@ -174,6 +175,7 @@
 /* VMCS Encodings */
 enum vmcs_field {
        VIRTUAL_PROCESSOR_ID            = 0x00000000,
+       POSTED_INTR_NV                  = 0x00000002,
        GUEST_ES_SELECTOR               = 0x00000800,
        GUEST_CS_SELECTOR               = 0x00000802,
        GUEST_SS_SELECTOR               = 0x00000804,
@@ -208,6 +210,8 @@ enum vmcs_field {
        VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
        APIC_ACCESS_ADDR                = 0x00002014,
        APIC_ACCESS_ADDR_HIGH           = 0x00002015,
+       POSTED_INTR_DESC_ADDR           = 0x00002016,
+       POSTED_INTR_DESC_ADDR_HIGH      = 0x00002017,
        EPT_POINTER                     = 0x0000201a,
        EPT_POINTER_HIGH                = 0x0000201b,
        EOI_EXIT_BITMAP0                = 0x0000201c,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 1817fa9..97cb8ee 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3277,6 +3277,144 @@ int arch_setup_dmar_msi(unsigned int irq)
 }
 #endif
 
+static int
+pi_set_affinity(struct irq_data *data, const struct cpumask *mask,
+                     bool force)
+{
+       unsigned int dest;
+       struct irq_cfg *cfg = (struct irq_cfg *)data->chip_data;
+       if (cpumask_equal(cfg->domain, mask))
+               return IRQ_SET_MASK_OK;
+
+       if (__ioapic_set_affinity(data, mask, &dest))
+               return -1;
+
+       return IRQ_SET_MASK_OK;
+}
+
+static void pi_mask(struct irq_data *data)
+{
+       ;
+}
+
+static void pi_unmask(struct irq_data *data)
+{
+       ;
+}
+
+static struct irq_chip pi_chip = {
+       .name       = "POSTED-INTR",
+       .irq_ack    = ack_apic_edge,
+       .irq_unmask = pi_unmask,
+       .irq_mask   = pi_mask,
+       .irq_set_affinity   = pi_set_affinity,
+};
+
+int arch_pi_migrate(int irq, int cpu)
+{
+       struct irq_data *data = irq_get_irq_data(irq);
+       struct irq_cfg *cfg;
+       struct irq_desc *desc = irq_to_desc(irq);
+       unsigned long flags;
+
+       if (!desc)
+               return -EINVAL;
+
+       cfg = irq_cfg(irq);
+       if (cpumask_equal(cfg->domain, cpumask_of(cpu)))
+               return cfg->vector;
+
+       irq_set_affinity(irq, cpumask_of(cpu));
+       raw_spin_lock_irqsave(&desc->lock, flags);
+       irq_move_irq(data);
+       raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+       if (cfg->move_in_progress)
+               send_cleanup_vector(cfg);
+       return cfg->vector;
+}
+EXPORT_SYMBOL_GPL(arch_pi_migrate);
+
+static int arch_pi_create_irq(const struct cpumask *mask)
+{
+       int node = cpu_to_node(0);
+       unsigned int irq_want;
+       struct irq_cfg *cfg;
+       unsigned long flags;
+       unsigned int ret = 0;
+       int irq;
+
+       irq_want = nr_irqs_gsi;
+
+       irq = alloc_irq_from(irq_want, node);
+       if (irq < 0)
+               return 0;
+       cfg = alloc_irq_cfg(irq_want, node);
+       if (!cfg) {
+               free_irq_at(irq, NULL);
+               return 0;
+       }
+
+       raw_spin_lock_irqsave(&vector_lock, flags);
+       if (!__assign_irq_vector(irq, cfg, mask))
+               ret = irq;
+       raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+       if (ret) {
+               irq_set_chip_data(irq, cfg);
+               irq_clear_status_flags(irq, IRQ_NOREQUEST);
+       } else {
+               free_irq_at(irq, cfg);
+       }
+       return ret;
+}
+
+int arch_pi_alloc_irq(void *vmx)
+{
+       int irq, cpu = smp_processor_id();
+       struct irq_cfg *cfg;
+
+       irq = arch_pi_create_irq(cpumask_of(cpu));
+       if (!irq) {
+               pr_err("Posted Interrupt: no free irq\n");
+               return -EINVAL;
+       }
+       irq_set_handler_data(irq, vmx);
+       irq_set_chip_and_handler_name(irq, &pi_chip, handle_edge_irq, "edge");
+       irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+       irq_set_affinity(irq, cpumask_of(cpu));
+
+       cfg = irq_cfg(irq);
+       if (cfg->move_in_progress)
+               send_cleanup_vector(cfg);
+
+       return irq;
+}
+EXPORT_SYMBOL_GPL(arch_pi_alloc_irq);
+
+void arch_pi_free_irq(unsigned int irq, void *vmx)
+{
+       if (irq) {
+               irq_set_handler_data(irq, NULL);
+               /* This will mask the irq */
+               free_irq(irq, vmx);
+               destroy_irq(irq);
+       }
+}
+EXPORT_SYMBOL_GPL(arch_pi_free_irq);
+
+int arch_pi_get_vector(unsigned int irq)
+{
+       struct irq_cfg *cfg;
+
+       if (!irq)
+               return -EINVAL;
+
+       cfg = irq_cfg(irq);
+       return cfg->vector;
+}
+EXPORT_SYMBOL_GPL(arch_pi_get_vector);
+
 #ifdef CONFIG_HPET_TIMER
 
 static int hpet_msi_set_affinity(struct irq_data *data,
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index af48361..04220de 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -656,7 +656,7 @@ void kvm_set_eoi_exitmap(struct kvm_vcpu *vcpu, int vector,
 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
                             int vector, int level, int trig_mode)
 {
-       int result = 0;
+       int result = 0, send;
        struct kvm_vcpu *vcpu = apic->vcpu;
 
        switch (delivery_mode) {
@@ -674,6 +674,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int 
delivery_mode,
                } else {
                        apic_clear_vector(vector, apic->regs + APIC_TMR);
                        kvm_set_eoi_exitmap(vcpu, vector, 0, 0);
+                       if (kvm_apic_pi_enabled(vcpu)) {
+                               send = kvm_x86_ops->send_nv(vcpu, vector);
+                               if (send) {
+                                       result = 1;
+                                       break;
+                               }
+                       }
                }
 
                result = !apic_test_and_set_irr(vector, apic);
@@ -1541,6 +1548,10 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 
        if (kvm_x86_ops->has_virtual_interrupt_delivery(vcpu))
                apic->vid_enabled = true;
+
+       if (kvm_x86_ops->has_posted_interrupt(vcpu))
+               apic->pi_enabled = true;
+
        return 0;
 nomem_free_apic:
        kfree(apic);
@@ -1575,6 +1586,24 @@ int kvm_apic_get_highest_irr(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_apic_get_highest_irr);
 
+void kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned int *pir)
+{
+       struct kvm_lapic *apic = vcpu->arch.apic;
+       unsigned int *reg;
+       unsigned int i;
+
+       if (!apic || !apic_enabled(apic))
+               return;
+
+       for (i = 0; i <= 7; i++) {
+               reg = apic->regs + APIC_IRR + i * 0x10;
+               *reg |= pir[i];
+               pir[i] = 0;
+       }
+       return;
+}
+EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
+
 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
 {
        u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 2503a64..ad35868 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -21,6 +21,7 @@ struct kvm_lapic {
        struct kvm_vcpu *vcpu;
        bool irr_pending;
        bool vid_enabled;
+       bool pi_enabled;
        /* Number of bits set in ISR. */
        s16 isr_count;
        /* The highest vector set in ISR; if -1 - invalid, must scan ISR. */
@@ -43,6 +44,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
 int kvm_cpu_has_extint(struct kvm_vcpu *v);
 int kvm_cpu_get_extint(struct kvm_vcpu *v);
 int kvm_apic_get_highest_irr(struct kvm_vcpu *vcpu);
+void kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned int *pir);
 void kvm_lapic_reset(struct kvm_vcpu *vcpu);
 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
@@ -94,6 +96,12 @@ static inline bool kvm_apic_vid_enabled(struct kvm_vcpu 
*vcpu)
        return apic->vid_enabled;
 }
 
+static inline bool kvm_apic_pi_enabled(struct kvm_vcpu *vcpu)
+{
+       struct kvm_lapic *apic = vcpu->arch.apic;
+       return apic->pi_enabled;
+}
+
 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
 void kvm_lapic_init(void);
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f6ef090..6448b96 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -31,6 +31,7 @@
 #include <linux/ftrace_event.h>
 #include <linux/slab.h>
 #include <linux/tboot.h>
+#include <linux/interrupt.h>
 #include "kvm_cache_regs.h"
 #include "x86.h"
 
@@ -89,6 +90,8 @@ module_param(enable_apicv_reg, bool, S_IRUGO);
 static bool __read_mostly enable_apicv_vid = 0;
 module_param(enable_apicv_vid, bool, S_IRUGO);
 
+static bool __read_mostly enable_apicv_pi = 0;
+module_param(enable_apicv_pi, bool, S_IRUGO);
 /*
  * If nested=1, nested virtualization is supported, i.e., guests may use
  * VMX and be a hypervisor for its own guests. If nested=0, guests may not
@@ -372,6 +375,44 @@ struct nested_vmx {
        struct page *apic_access_page;
 };
 
+/* Posted-Interrupt Descriptor */
+struct pi_desc {
+       u32 pir[8];     /* Posted interrupt requested */
+       union {
+               struct {
+                       u8  on:1,
+                           rsvd:7;
+               } control;
+               u32 rsvd[8];
+       } u;
+} __aligned(64);
+
+#define POSTED_INTR_ON  0
+u8 pi_test_on(struct pi_desc *pi_desc)
+{
+       return test_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->u.control);
+}
+void pi_set_on(struct pi_desc *pi_desc)
+{
+       set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->u.control);
+}
+
+void pi_clear_on(struct pi_desc *pi_desc)
+{
+       clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->u.control);
+}
+
+u8 pi_test_and_set_on(struct pi_desc *pi_desc)
+{
+       return test_and_set_bit(POSTED_INTR_ON,
+                       (unsigned long *)&pi_desc->u.control);
+}
+
+void pi_set_pir(int vector, struct pi_desc *pi_desc)
+{
+       set_bit(vector, (unsigned long *)pi_desc->pir);
+}
+
 struct vcpu_vmx {
        struct kvm_vcpu       vcpu;
        unsigned long         host_rsp;
@@ -439,6 +480,11 @@ struct vcpu_vmx {
        u64 eoi_exit_bitmap[4];
        u64 eoi_exit_bitmap_global[4];
 
+       /* Posted interrupt descriptor */
+       struct pi_desc *pi;
+       u32 irq;
+       u32 vector;
+
        /* Support for a guest hypervisor (nested VMX) */
        struct nested_vmx nested;
 };
@@ -698,6 +744,11 @@ static u64 host_efer;
 
 static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
 
+int arch_pi_get_vector(unsigned int irq);
+int arch_pi_alloc_irq(struct vcpu_vmx *vmx);
+void arch_pi_free_irq(unsigned int irq, struct vcpu_vmx *vmx);
+int arch_pi_migrate(int irq, int cpu);
+
 /*
  * Keep MSR_STAR at the end, as setup_msrs() will try to optimize it
  * away by decrementing the array size.
@@ -783,6 +834,11 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
                SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
 }
 
+static inline bool cpu_has_vmx_posted_intr(void)
+{
+       return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
+}
+
 static inline bool cpu_has_vmx_flexpriority(void)
 {
        return cpu_has_vmx_tpr_shadow() &&
@@ -1555,6 +1611,11 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                struct desc_ptr *gdt = &__get_cpu_var(host_gdt);
                unsigned long sysenter_esp;
 
+               if (enable_apicv_pi && to_vmx(vcpu)->pi)
+                       pi_set_on(to_vmx(vcpu)->pi);
+
+               kvm_make_request(KVM_REQ_POSTED_INTR, vcpu);
+
                kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
                local_irq_disable();
                list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
@@ -1582,6 +1643,8 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
                vcpu->cpu = -1;
                kvm_cpu_vmxoff();
        }
+       if (enable_apicv_pi && to_vmx(vcpu)->pi)
+               pi_set_on(to_vmx(vcpu)->pi);
 }
 
 static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
@@ -2451,12 +2514,6 @@ static __init int setup_vmcs_config(struct vmcs_config 
*vmcs_conf)
        u32 _vmexit_control = 0;
        u32 _vmentry_control = 0;
 
-       min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
-       opt = PIN_BASED_VIRTUAL_NMIS;
-       if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
-                               &_pin_based_exec_control) < 0)
-               return -EIO;
-
        min = CPU_BASED_HLT_EXITING |
 #ifdef CONFIG_X86_64
              CPU_BASED_CR8_LOAD_EXITING |
@@ -2531,6 +2588,17 @@ static __init int setup_vmcs_config(struct vmcs_config 
*vmcs_conf)
                                &_vmexit_control) < 0)
                return -EIO;
 
+       min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
+       opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
+       if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
+                               &_pin_based_exec_control) < 0)
+               return -EIO;
+
+       if (!(_cpu_based_2nd_exec_control &
+               SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ||
+               !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
+               _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
+
        min = 0;
        opt = VM_ENTRY_LOAD_IA32_PAT;
        if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
@@ -2715,6 +2783,9 @@ static __init int hardware_setup(void)
        if (!cpu_has_vmx_virtual_intr_delivery())
                enable_apicv_vid = 0;
 
+       if (!cpu_has_vmx_posted_intr() || !x2apic_enabled())
+               enable_apicv_pi = 0;
+
        if (nested)
                nested_vmx_setup_ctls_msrs();
 
@@ -3881,6 +3952,93 @@ static void ept_set_mmio_spte_mask(void)
        kvm_mmu_set_mmio_spte_mask(0xffull << 49 | 0x6ull);
 }
 
+irqreturn_t pi_handler(int irq, void *data)
+{
+       struct vcpu_vmx *vmx = data;
+
+       kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
+       kvm_vcpu_kick(&vmx->vcpu);
+
+       return IRQ_HANDLED;
+}
+
+static int vmx_has_posted_interrupt(struct kvm_vcpu *vcpu)
+{
+       return irqchip_in_kernel(vcpu->kvm) && enable_apicv_pi;
+}
+
+static void vmx_pi_migrate(struct kvm_vcpu *vcpu)
+{
+       int ret = 0;
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+       if (!enable_apicv_pi)
+               return ;
+
+       preempt_disable();
+       local_irq_disable();
+       if (!vmx->irq) {
+               ret = arch_pi_alloc_irq(vmx);
+               if (ret < 0) {
+                       vmx->irq = -1;
+                       goto out;
+               }
+               vmx->irq = ret;
+
+               ret = request_irq(vmx->irq, pi_handler, IRQF_NO_THREAD,
+                                       "Posted Interrupt", vmx);
+               if (ret) {
+                       vmx->irq = -1;
+                       goto out;
+               }
+
+               ret = arch_pi_get_vector(vmx->irq);
+       } else
+               ret = arch_pi_migrate(vmx->irq, smp_processor_id());
+
+       if (ret < 0) {
+               vmx->irq = -1;
+               goto out;
+       } else {
+               vmx->vector = ret;
+               vmcs_write16(POSTED_INTR_NV, vmx->vector);
+               pi_clear_on(vmx->pi);
+       }
+out:
+       local_irq_enable();
+       preempt_enable();
+       return ;
+}
+
+static int vmx_send_nv(struct kvm_vcpu *vcpu,
+               int vector)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+       if (unlikely(vmx->irq == -1))
+               return 0;
+
+       if (vcpu->cpu == smp_processor_id()) {
+               pi_set_on(vmx->pi);
+               return 0;
+       }
+
+       pi_set_pir(vector, vmx->pi);
+       if (!pi_test_and_set_on(vmx->pi) && (vcpu->mode == IN_GUEST_MODE)) {
+               apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), vmx->vector);
+               return 1;
+       }
+       return 0;
+}
+
+static void free_pi(struct vcpu_vmx *vmx)
+{
+       if (enable_apicv_pi) {
+               kfree(vmx->pi);
+               arch_pi_free_irq(vmx->irq, vmx);
+       }
+}
+
 /*
  * Sets up the vmcs for emulated real mode.
  */
@@ -3890,6 +4048,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
        unsigned long a;
 #endif
        int i;
+       u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
 
        /* I/O */
        vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a));
@@ -3901,8 +4060,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
        vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
 
        /* Control */
-       vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
-               vmcs_config.pin_based_exec_ctrl);
+       if (!enable_apicv_pi)
+               pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
+
+       vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, pin_based_exec_ctrl);
 
        vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
 
@@ -3920,6 +4081,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
                vmcs_write16(GUEST_INTR_STATUS, 0);
        }
 
+       if (enable_apicv_pi) {
+               vmx->pi = kmalloc(sizeof(struct pi_desc),
+                               GFP_KERNEL | __GFP_ZERO);
+               vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((vmx->pi)));
+       }
+
        if (ple_gap) {
                vmcs_write32(PLE_GAP, ple_gap);
                vmcs_write32(PLE_WINDOW, ple_window);
@@ -6161,6 +6328,11 @@ static void vmx_update_irq(struct kvm_vcpu *vcpu)
        if (!enable_apicv_vid)
                return ;
 
+       if (enable_apicv_pi) {
+               kvm_apic_update_irr(vcpu, (unsigned int *)vmx->pi->pir);
+               pi_clear_on(vmx->pi);
+       }
+
        vector = kvm_apic_get_highest_irr(vcpu);
        if (vector == -1)
                return;
@@ -6586,6 +6758,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 
        free_vpid(vmx);
        free_nested(vmx);
+       free_pi(vmx);
        free_loaded_vmcs(vmx->loaded_vmcs);
        kfree(vmx->guest_msrs);
        kvm_vcpu_uninit(vcpu);
@@ -7483,8 +7656,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
        .enable_irq_window = enable_irq_window,
        .update_cr8_intercept = update_cr8_intercept,
        .has_virtual_interrupt_delivery = vmx_has_virtual_interrupt_delivery,
+       .has_posted_interrupt = vmx_has_posted_interrupt,
        .update_irq = vmx_update_irq,
        .set_eoi_exitmap = vmx_set_eoi_exitmap,
+       .send_nv = vmx_send_nv,
+       .pi_migrate = vmx_pi_migrate,
 
        .set_tss_addr = vmx_set_tss_addr,
        .get_tdp_level = get_ept_level,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8b8de3b..f035267 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5250,6 +5250,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        bool req_immediate_exit = 0;
 
        if (vcpu->requests) {
+               if (kvm_check_request(KVM_REQ_POSTED_INTR, vcpu))
+                       kvm_x86_ops->pi_migrate(vcpu);
                if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
                        kvm_mmu_unload(vcpu);
                if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ecc5543..f8d8d34 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -107,6 +107,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_IMMEDIATE_EXIT    15
 #define KVM_REQ_PMU               16
 #define KVM_REQ_PMI               17
+#define KVM_REQ_POSTED_INTR       18
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID            0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID       1
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index be70035..05baf1c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1625,6 +1625,8 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
                        smp_send_reschedule(cpu);
        put_cpu();
 }
+EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
+
 #endif /* !CONFIG_S390 */
 
 void kvm_resched(struct kvm_vcpu *vcpu)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to