[PATCH 00/13] Add VT-d Posted-Interrupts support for KVM

2014-11-09 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

Feng Wu (13):
  iommu/vt-d: VT-d Posted-Interrupts feature detection
  KVM: Initialize VT-d Posted-Interrtups Descriptor
  KVM: Add KVM_CAP_PI to detect VT-d Posted-Interrtups
  iommu/vt-d: Adjust 'struct irte' to better suit for VT-d
Posted-Interrupts
  KVM: Update IRTE according to guest interrupt configuration changes
  KVM: Add some helper functions for Posted-Interrupts
  x86, irq: Define a global vector for VT-d Posted-Interrupts
  KVM: Update Posted-Interrupts descriptor during VCPU scheduling
  KVM: Change NDST field after VCPU scheduling
  KVM: Add the handler for Wake-up Vector
  KVM: Suppress posted-interrupt when 'SN' is set
  iommu/vt-d: No need to migrating irq for VT-d Posted-Interrtups
  iommu/vt-d: Add a command line parameter for VT-d posted-interrupts

 arch/x86/include/asm/entry_arch.h|2 +
 arch/x86/include/asm/hardirq.h   |1 +
 arch/x86/include/asm/hw_irq.h|2 +
 arch/x86/include/asm/irq_remapping.h |7 +
 arch/x86/include/asm/irq_vectors.h   |1 +
 arch/x86/include/asm/kvm_host.h  |9 ++
 arch/x86/kernel/apic/apic.c  |1 +
 arch/x86/kernel/entry_64.S   |2 +
 arch/x86/kernel/irq.c|   27 
 arch/x86/kernel/irqinit.c|2 +
 arch/x86/kvm/vmx.c   |  257 +-
 arch/x86/kvm/x86.c   |   53 ++-
 drivers/iommu/amd_iommu.c|6 +
 drivers/iommu/intel_irq_remapping.c  |   83 +--
 drivers/iommu/irq_remapping.c|   20 +++
 drivers/iommu/irq_remapping.h|8 +
 include/linux/dmar.h |   30 -
 include/linux/intel-iommu.h  |1 +
 include/linux/kvm_host.h |   25 
 include/uapi/linux/kvm.h |2 +
 virt/kvm/assigned-dev.c  |  141 +++
 virt/kvm/irq_comm.c  |4 +-
 virt/kvm/irqchip.c   |   11 --
 virt/kvm/kvm_main.c  |   14 ++
 24 files changed, 667 insertions(+), 42 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/13] KVM: Update Posted-Interrupts descriptor during VCPU scheduling

2014-11-09 Thread Feng Wu
Update Posted-Interrupts descriptor according to the
following rules:
- Before VCPU block, set 'NV' to POSTED_INTR_WAKEUP_VECTOR
- After VCPU block, set 'NV' back to POSTED_INTR_VECTOR

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/kvm_host.h |5 ++
 arch/x86/kvm/vmx.c  |   83 +++
 arch/x86/kvm/x86.c  |   16 +++
 virt/kvm/kvm_main.c |   11 +
 4 files changed, 115 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0630161..71cfe3e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -773,6 +773,8 @@ struct kvm_x86_ops {
 
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
u64 (*get_pi_desc_addr)(struct kvm_vcpu *vcpu);
+   int (*vcpu_pre_block)(struct kvm_vcpu *vcpu);
+   void (*vcpu_post_block)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
@@ -1095,4 +1097,7 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, 
u64 *data);
 void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
 void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
 
+int kvm_arch_vcpu_pre_block(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_post_block(struct kvm_vcpu *vcpu);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f4f..4c1a966 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9153,6 +9153,86 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
shrink_ple_window(vcpu);
 }
 
+static int vmx_vcpu_pre_block(struct kvm_vcpu *vcpu)
+{
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+   struct pi_desc old;
+   struct pi_desc new;
+
+   if (!irq_post_enabled)
+   return 0;
+
+   memset(&old, 0, sizeof(old));
+   memset(&new, 0, sizeof(new));
+
+   do {
+   old.control = new.control = pi_desc->control;
+
+   /*
+* A posted-interrupt happened in the one of the
+* following two cases:
+* 1. After the latest pir-to-virr sync operation
+* in kvm_arch_vcpu_runnable() function
+* 2. In this do-while() loop, a posted-interrupt
+* occurs.
+*
+* For either of above cases, we should not block
+* the VCPU.
+*/
+   if (pi_test_on(pi_desc) == 1) {
+   /*
+* Need to set this flag, then the inject will
+* be synced from PIR to vIRR before VM-ENTRY.
+* In fact, for guest IPI case, in function
+* vmx_deliver_posted_interrupt(), this flags
+* has already been set, but if the interrupt
+* is injected by VT-d PI hardware, we need
+* to set this.
+*/
+   kvm_make_request(KVM_REQ_EVENT, vcpu);
+   return 1;
+   }
+
+   pi_clear_sn(&new);
+
+   /* set 'NV' to 'wakeup vector' */
+   new.nv = POSTED_INTR_WAKEUP_VECTOR;
+   } while (cmpxchg(&pi_desc->control, old.control, new.control)
+   != old.control);
+
+   return 0;
+}
+
+static void vmx_vcpu_post_block(struct kvm_vcpu *vcpu)
+{
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+   struct pi_desc old;
+   struct pi_desc new;
+   unsigned int dest = 0;
+
+   if (!irq_post_enabled)
+   return;
+
+   pi_set_sn(pi_desc);
+
+   do {
+   old.control = new.control = pi_desc->control;
+
+   dest = cpu_physical_id(vcpu->cpu);
+
+   if (x2apic_mode)
+   new.ndst = dest;
+   else
+   new.ndst = (dest << 8) & 0xFF00;
+
+   /* set 'NV' to 'notification vector' */
+   new.nv = POSTED_INTR_VECTOR;
+   } while (cmpxchg(&pi_desc->control, old.control, new.control)
+   != old.control);
+
+   pi_clear_sn(pi_desc);
+}
+
 static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -9262,6 +9342,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
.sched_in = vmx_sched_in,
 
.get_pi_desc_addr = vmx_get_pi_desc_addr,
+
+   .vcpu_pre_block = vmx_vcpu_pre_block,
+   .vcpu_post_block = vmx_vcpu_post_block,
 };
 
 static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0c19d15..d0c8bb2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7746,6 +7746,22 @@ int kvm_update_pi_irte_common(struct kvm *kvm, struct 
kvm_vcpu *vcpu,
retu

[PATCH 06/13] KVM: Add some helper functions for Posted-Interrupts

2014-11-09 Thread Feng Wu
This patch adds three helper functions to manipulate the Posted-
Interrtups Decriptor.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/vmx.c |   18 ++
 1 files changed, 18 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ae91b72..f4f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -435,6 +435,24 @@ static void pi_clear_sn(struct pi_desc *pi_desc)
(unsigned long *)&pi_desc->control);
 }
 
+static void pi_set_sn(struct pi_desc *pi_desc)
+{
+   return set_bit(POSTED_INTR_SN,
+   (unsigned long *)&pi_desc->control);
+}
+
+static int pi_test_on(struct pi_desc *pi_desc)
+{
+   return test_bit(POSTED_INTR_ON,
+   (unsigned long *)&pi_desc->control);
+}
+
+static int pi_test_sn(struct pi_desc *pi_desc)
+{
+   return test_bit(POSTED_INTR_SN,
+   (unsigned long *)&pi_desc->control);
+}
+
 static bool pi_test_and_set_on(struct pi_desc *pi_desc)
 {
return test_and_set_bit(POSTED_INTR_ON,
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 13/13] iommu/vt-d: Add a command line parameter for VT-d posted-interrupts

2014-11-09 Thread Feng Wu
Enable VT-d Posted-Interrtups and add a command line
parameter for it.

Signed-off-by: Feng Wu 
---
 drivers/iommu/irq_remapping.c |9 -
 1 files changed, 8 insertions(+), 1 deletions(-)

diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 0e36860..3cb9429 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -23,7 +23,7 @@ int irq_remap_broken;
 int disable_sourceid_checking;
 int no_x2apic_optout;
 
-int disable_irq_post = 1;
+int disable_irq_post = 0;
 int irq_post_enabled = 0;
 EXPORT_SYMBOL_GPL(irq_post_enabled);
 
@@ -206,6 +206,13 @@ static __init int setup_irqremap(char *str)
 }
 early_param("intremap", setup_irqremap);
 
+static __init int setup_nointpost(char *str)
+{
+   disable_irq_post = 1;
+   return 0;
+}
+early_param("nointpost", setup_nointpost);
+
 void __init setup_irq_remapping_ops(void)
 {
remap_ops = &intel_irq_remap_ops;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/13] iommu/vt-d: No need to migrating irq for VT-d Posted-Interrtups

2014-11-09 Thread Feng Wu
We don't need to migrate the irqs for VT-d Posted-Interrtups here.
When 'pst' is set in IRTE, the associated irq will be posted to
guests instead of interrupt remapping. The destination of the
interrupt is set in Posted-Interrupts Descriptor, and the migration
happens during VCPU scheduling.

Signed-off-by: Feng Wu 
---
 drivers/iommu/intel_irq_remapping.c |7 +++
 1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/drivers/iommu/intel_irq_remapping.c 
b/drivers/iommu/intel_irq_remapping.c
index 87c02fe..249e2b1 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -1038,6 +1038,13 @@ intel_ioapic_set_affinity(struct irq_data *data, const 
struct cpumask *mask,
if (get_irte(irq, &irte))
return -EBUSY;
 
+   /*
+* If the interrupt is for posting, it is used by guests,
+* we cannot change IRTE here.
+*/
+   if (irte.irq_post_low.pst == 1)
+   return 0;
+
err = assign_irq_vector(irq, cfg, mask);
if (err)
return err;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 07/13] x86, irq: Define a global vector for VT-d Posted-Interrupts

2014-11-09 Thread Feng Wu
Currently, we use a global vector as the Posted-Interrupts
Notification Event for all the VCPUs in the system. We need
to introduce another global vector for VT-d Posted-Interrtups,
which will be used to wakeup the sleep VCPU when an external
interrupt from a direct-assigned device happens for that VCPU.

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/entry_arch.h  |2 ++
 arch/x86/include/asm/hardirq.h |1 +
 arch/x86/include/asm/hw_irq.h  |2 ++
 arch/x86/include/asm/irq_vectors.h |1 +
 arch/x86/kernel/entry_64.S |2 ++
 arch/x86/kernel/irq.c  |   27 +++
 arch/x86/kernel/irqinit.c  |2 ++
 7 files changed, 37 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/entry_arch.h 
b/arch/x86/include/asm/entry_arch.h
index dc5fa66..27ca0af 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -23,6 +23,8 @@ BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
 #ifdef CONFIG_HAVE_KVM
 BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR,
 smp_kvm_posted_intr_ipi)
+BUILD_INTERRUPT3(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR,
+smp_kvm_posted_intr_wakeup_ipi)
 #endif
 
 /*
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 0f5fb6b..9866065 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -14,6 +14,7 @@ typedef struct {
 #endif
 #ifdef CONFIG_HAVE_KVM
unsigned int kvm_posted_intr_ipis;
+   unsigned int kvm_posted_intr_wakeup_ipis;
 #endif
unsigned int x86_platform_ipis; /* arch dependent */
unsigned int apic_perf_irqs;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 4615906..559563c 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -29,6 +29,7 @@
 extern asmlinkage void apic_timer_interrupt(void);
 extern asmlinkage void x86_platform_ipi(void);
 extern asmlinkage void kvm_posted_intr_ipi(void);
+extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
 extern asmlinkage void error_interrupt(void);
 extern asmlinkage void irq_work_interrupt(void);
 
@@ -92,6 +93,7 @@ extern void trace_call_function_single_interrupt(void);
 #define trace_irq_move_cleanup_interrupt  irq_move_cleanup_interrupt
 #define trace_reboot_interrupt  reboot_interrupt
 #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
+#define trace_kvm_posted_intr_wakeup_ipi kvm_posted_intr_wakeup_ipi
 #endif /* CONFIG_TRACING */
 
 /* IOAPIC */
diff --git a/arch/x86/include/asm/irq_vectors.h 
b/arch/x86/include/asm/irq_vectors.h
index 5702d7e..1343349 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -105,6 +105,7 @@
 /* Vector for KVM to deliver posted interrupt IPI */
 #ifdef CONFIG_HAVE_KVM
 #define POSTED_INTR_VECTOR 0xf2
+#define POSTED_INTR_WAKEUP_VECTOR  0xf1
 #endif
 
 /*
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index df088bb..7663aaa 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1004,6 +1004,8 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
 #ifdef CONFIG_HAVE_KVM
 apicinterrupt3 POSTED_INTR_VECTOR \
kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
+apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR \
+   kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi
 #endif
 
 #ifdef CONFIG_X86_MCE_THRESHOLD
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 922d285..47408c3 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -237,6 +237,9 @@ __visible void smp_x86_platform_ipi(struct pt_regs *regs)
 }
 
 #ifdef CONFIG_HAVE_KVM
+void (*wakeup_handler_callback)(void) = NULL;
+EXPORT_SYMBOL_GPL(wakeup_handler_callback);
+
 /*
  * Handler for POSTED_INTERRUPT_VECTOR.
  */
@@ -256,6 +259,30 @@ __visible void smp_kvm_posted_intr_ipi(struct pt_regs 
*regs)
 
set_irq_regs(old_regs);
 }
+
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+__visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs)
+{
+   struct pt_regs *old_regs = set_irq_regs(regs);
+
+   ack_APIC_irq();
+
+   irq_enter();
+
+   exit_idle();
+
+   inc_irq_stat(kvm_posted_intr_wakeup_ipis);
+
+   if (wakeup_handler_callback)
+   wakeup_handler_callback();
+
+   irq_exit();
+
+   set_irq_regs(old_regs);
+}
+
 #endif
 
 __visible void smp_trace_x86_platform_ipi(struct pt_regs *regs)
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 4de73ee..659cde3 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -168,6 +168,8 @@ static void __init apic_intr_init(void)
 #ifdef CONFIG_HAVE_KVM
/* IPI for KVM to deliver posted interrupt */
alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi);
+   /* IPI for KVM to deliver interrupt to wake up tasks */
+   alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR

[PATCH 11/13] KVM: Suppress posted-interrupt when 'SN' is set

2014-11-09 Thread Feng Wu
Currently, we don't support urgent interrupt, all interrupts
are recognized as non-urgent interrupt, so we cannot send
posted-interrupt when 'SN' is set.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/vmx.c |   11 +--
 1 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 51d2c8a..495cfbd 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4306,15 +4306,22 @@ static int vmx_vm_has_apicv(struct kvm *kvm)
 static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
-   int r;
+   int r, sn;
 
if (pi_test_and_set_pir(vector, &vmx->pi_desc))
return;
 
+   /*
+* Currently, we don't support urgent interrupt, all interrupts
+* are recognized as non-urgent interrupt, so we cannot send
+* posted-interrupt when 'SN' is set.
+*/
+   sn = pi_test_sn(&vmx->pi_desc);
+
r = pi_test_and_set_on(&vmx->pi_desc);
kvm_make_request(KVM_REQ_EVENT, vcpu);
 #ifdef CONFIG_SMP
-   if (!r && (vcpu->mode == IN_GUEST_MODE))
+   if (!r && !sn && (vcpu->mode == IN_GUEST_MODE))
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
POSTED_INTR_VECTOR);
else
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/13] KVM: Add the handler for Wake-up Vector

2014-11-09 Thread Feng Wu
When VCPU is blocked and an external interrupts from assigned
devices is delivered to it, VT-d Posted-Interrupts mechanism
will deliver a interrrupt to the associated physical CPU with
Wake-up Vector. In its handler, we find the destination VCPU
and wake up it.

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/kvm_host.h |2 +
 arch/x86/kvm/vmx.c  |   52 +++
 arch/x86/kvm/x86.c  |   22 +++-
 include/linux/kvm_host.h|3 ++
 virt/kvm/kvm_main.c |3 ++
 5 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 71cfe3e..ca231a3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -99,6 +99,8 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, 
int level)
 
 #define ASYNC_PF_PER_VCPU 64
 
+extern void (*wakeup_handler_callback)(void);
+
 enum kvm_reg {
VCPU_REGS_RAX = 0,
VCPU_REGS_RCX = 1,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fa77714..51d2c8a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -822,6 +822,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
 static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 
+/*
+ * We maintian a per-CPU linked-list of VCPU, so in wakeup_handler() we
+ * can find which VCPU should be waken up.
+ */
+static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+
 static unsigned long *vmx_io_bitmap_a;
 static unsigned long *vmx_io_bitmap_b;
 static unsigned long *vmx_msr_bitmap_legacy;
@@ -2813,6 +2820,8 @@ static int hardware_enable(void)
return -EBUSY;
 
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
+   INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
+   spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
 
/*
 * Now we can enable the vmclear operation in kdump
@@ -9183,6 +9192,7 @@ static int vmx_vcpu_pre_block(struct kvm_vcpu *vcpu)
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
struct pi_desc old;
struct pi_desc new;
+   unsigned long flags;
 
if (!irq_post_enabled)
return 0;
@@ -9222,9 +9232,22 @@ static int vmx_vcpu_pre_block(struct kvm_vcpu *vcpu)
 
/* set 'NV' to 'wakeup vector' */
new.nv = POSTED_INTR_WAKEUP_VECTOR;
+
+   /*
+* We should save physical cpu id here, vcpu->cpu may
+* be changed due to preemption, in that case, this
+* do-while loop will run again.
+*/
+   vcpu->wakeup_cpu = vcpu->cpu;
} while (cmpxchg(&pi_desc->control, old.control, new.control)
!= old.control);
 
+   spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+   vcpu->wakeup_cpu), flags);
+   list_add_tail(&vcpu->blocked_vcpu_list,
+   &per_cpu(blocked_vcpu_on_cpu, vcpu->wakeup_cpu));
+   spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
+   vcpu->wakeup_cpu), flags);
return 0;
 }
 
@@ -9234,6 +9257,7 @@ static void vmx_vcpu_post_block(struct kvm_vcpu *vcpu)
struct pi_desc old;
struct pi_desc new;
unsigned int dest = 0;
+   unsigned long flags;
 
if (!irq_post_enabled)
return;
@@ -9255,6 +9279,13 @@ static void vmx_vcpu_post_block(struct kvm_vcpu *vcpu)
} while (cmpxchg(&pi_desc->control, old.control, new.control)
!= old.control);
 
+   spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+   vcpu->wakeup_cpu), flags);
+   list_del(&vcpu->blocked_vcpu_list);
+   spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
+   vcpu->wakeup_cpu), flags);
+   vcpu->wakeup_cpu = -1;
+
pi_clear_sn(pi_desc);
 }
 
@@ -9372,6 +9403,25 @@ static struct kvm_x86_ops vmx_x86_ops = {
.vcpu_post_block = vmx_vcpu_post_block,
 };
 
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+void wakeup_handler(void)
+{
+   struct kvm_vcpu *vcpu;
+   int cpu = smp_processor_id();
+
+   spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+   list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
+   blocked_vcpu_list) {
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+   if (pi_test_on(pi_desc) == 1)
+   kvm_vcpu_kick(vcpu);
+   }
+   spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+}
+
 static int __init vmx_init(void)
 {
int r, i, msr;
@@ -9486,6 +9536,8 @@ static

[PATCH] Add VT-d Posted-Interrupts support in QEMU

2014-11-09 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

This patch is used together with the patches in KVM side.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

Feng Wu (1):
  x86: Update VT-d Posted-Interrupts related information

 hw/i386/kvm/pci-assign.c  |   24 
 linux-headers/linux/kvm.h |2 ++
 target-i386/kvm.c |5 +
 target-i386/kvm_i386.h|1 +
 4 files changed, 32 insertions(+), 0 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/13] KVM: Update IRTE according to guest interrupt configuration changes

2014-11-09 Thread Feng Wu
When guest changes its interrupt configuration (such as, vector, etc.)
for direct-assigned devices, we need to update the associated IRTE
with the new guest vector, so external interrupts from the assigned
devices can be injected to guests without VM-Exit.

The current method of handling guest lowest priority interrtups
is to use a counter 'apic_arb_prio' for each VCPU, we choose the
VCPU with smallest 'apic_arb_prio' and then increase it by 1.
However, for VT-d PI, we cannot re-use this, since we no longer
have control to 'apic_arb_prio' with posted interrupt direct
delivery by Hardware.

Here, we introduce a similiar way with 'apic_arb_prio' to handle
guest lowest priority interrtups when VT-d PI is used. Here is the
ideas:
- Each VCPU has a counter 'round_robin_counter'.
- When guests sets an interrupts to lowest priority, we choose
the VCPU with smallest 'round_robin_counter' as the destination,
then increase it.

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/irq_remapping.h |6 ++
 arch/x86/include/asm/kvm_host.h  |2 +
 arch/x86/kvm/vmx.c   |   12 +++
 arch/x86/kvm/x86.c   |   11 +++
 drivers/iommu/amd_iommu.c|6 ++
 drivers/iommu/intel_irq_remapping.c  |   28 +++
 drivers/iommu/irq_remapping.c|9 ++
 drivers/iommu/irq_remapping.h|3 +
 include/linux/dmar.h |   26 ++
 include/linux/kvm_host.h |   22 +
 include/uapi/linux/kvm.h |1 +
 virt/kvm/assigned-dev.c  |  141 ++
 virt/kvm/irq_comm.c  |4 +-
 virt/kvm/irqchip.c   |   11 ---
 14 files changed, 269 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/irq_remapping.h 
b/arch/x86/include/asm/irq_remapping.h
index a3cc437..32d6cc4 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -51,6 +51,7 @@ extern void compose_remapped_msi_msg(struct pci_dev *pdev,
 unsigned int irq, unsigned int dest,
 struct msi_msg *msg, u8 hpet_id);
 extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
+extern int update_pi_irte(unsigned int irq, u64 pi_desc_addr, u32 vector);
 extern void panic_if_irq_remap(const char *msg);
 extern bool setup_remapped_irq(int irq,
   struct irq_cfg *cfg,
@@ -88,6 +89,11 @@ static inline int setup_hpet_msi_remapped(unsigned int irq, 
unsigned int id)
return -ENODEV;
 }
 
+static inline int update_pi_irte(unsigned int irq, u64 pi_desc_addr, u32 
vector)
+{
+   return -ENODEV;
+}
+
 static inline void panic_if_irq_remap(const char *msg)
 {
 }
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ed0c30..0630161 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -358,6 +358,7 @@ struct kvm_vcpu_arch {
struct kvm_lapic *apic;/* kernel irqchip context */
unsigned long apic_attention;
int32_t apic_arb_prio;
+   int32_t round_robin_counter;
int mp_state;
u64 ia32_misc_enable_msr;
bool tpr_access_reporting;
@@ -771,6 +772,7 @@ struct kvm_x86_ops {
int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
 
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
+   u64 (*get_pi_desc_addr)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a4670d3..ae91b72 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -544,6 +544,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu 
*vcpu)
return container_of(vcpu, struct vcpu_vmx, vcpu);
 }
 
+struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
+{
+   return &(to_vmx(vcpu)->pi_desc);
+}
+
 #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
 #define FIELD(number, name)[number] = VMCS12_OFFSET(name)
 #define FIELD64(number, name)  [number] = VMCS12_OFFSET(name), \
@@ -4280,6 +4285,11 @@ static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu 
*vcpu)
return;
 }
 
+static u64 vmx_get_pi_desc_addr(struct kvm_vcpu *vcpu)
+{
+   return __pa((u64)vcpu_to_pi_desc(vcpu));
+}
+
 /*
  * Set up the vmcs's constant host-state fields, i.e., host-state fields that
  * will not change in the lifetime of the guest.
@@ -9232,6 +9242,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.check_nested_events = vmx_check_nested_events,
 
.sched_in = vmx_sched_in,
+
+   .get_pi_desc_addr = vmx_get_pi_desc_addr,
 };
 
 static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b447a98..0c19d15 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7735,6 +7735,17 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
 }
 EXPORT_SYMBOL_GPL(kvm_arch_has_no

[PATCH 09/13] KVM: Change NDST field after VCPU scheduling

2014-11-09 Thread Feng Wu
This patch changes the NDST filed of Posted-Interrupts
Descriptor after VCPU is scheduled to another physical
CPU.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/vmx.c |   25 +
 1 files changed, 25 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4c1a966..fa77714 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1906,6 +1906,31 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
vmx->loaded_vmcs->cpu = cpu;
}
+
+   if (irq_post_enabled && (vcpu->cpu != cpu)) {
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+   struct pi_desc old, new;
+   unsigned int dest;
+
+   memset(&old, 0, sizeof(old));
+   memset(&new, 0, sizeof(new));
+
+   pi_set_sn(pi_desc);
+
+   do {
+   old.control = new.control = pi_desc->control;
+
+   dest = cpu_physical_id(cpu);
+
+   if (x2apic_mode)
+   new.ndst = dest;
+   else
+   new.ndst = (dest << 8) & 0xFF00;
+
+   } while (cmpxchg(&pi_desc->control, old.control,
+   new.control) != old.control);
+   pi_clear_sn(pi_desc);
+   }
 }
 
 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] x86: Update VT-d Posted-Interrupts related information

2014-11-09 Thread Feng Wu
VT-d Posted-Interrupts(PI) is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
involvement when guest is running in non-root mode.

If VT-d PI is supported by KVM, we need to update the IRTE with
the new guest interrtup configuration.

Signed-off-by: Feng Wu 
---
 hw/i386/kvm/pci-assign.c  |   24 
 linux-headers/linux/kvm.h |2 ++
 target-i386/kvm.c |5 +
 target-i386/kvm_i386.h|1 +
 4 files changed, 32 insertions(+), 0 deletions(-)

diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index bb206da..e55a99b 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -1005,6 +1005,12 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
 assigned_dev->intx_route.mode = PCI_INTX_DISABLED;
 assigned_dev->intx_route.irq = -1;
 assigned_dev->assigned_irq_type = ASSIGNED_IRQ_MSI;
+
+if (kvm_check_extension(kvm_state, KVM_CAP_PI)) {
+if (kvm_device_pi_update(kvm_state, assigned_dev->dev_id) < 0) {
+perror("assigned_dev_update_msi: kvm_device_pi_update");
+}
+}
 } else {
 Error *local_err = NULL;
 
@@ -1029,6 +1035,12 @@ static void assigned_dev_update_msi_msg(PCIDevice 
*pci_dev)
 
 kvm_irqchip_update_msi_route(kvm_state, assigned_dev->msi_virq[0],
  msi_get_message(pci_dev, 0));
+
+if (kvm_check_extension(kvm_state, KVM_CAP_PI)) {
+if (kvm_device_pi_update(kvm_state, assigned_dev->dev_id) < 0) {
+perror("assigned_dev_update_msi_msg: kvm_device_pi_update");
+}
+}
 }
 
 static bool assigned_dev_msix_masked(MSIXTableEntry *entry)
@@ -1149,6 +1161,12 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev)
 perror("assigned_dev_enable_msix: assign irq");
 return;
 }
+
+if (kvm_check_extension(kvm_state, KVM_CAP_PI)) {
+if (kvm_device_pi_update(kvm_state, assigned_dev->dev_id) < 0) 
{
+perror("assigned_dev_update_msix: kvm_device_pi_update");
+}
+}
 }
 assigned_dev->intx_route.mode = PCI_INTX_DISABLED;
 assigned_dev->intx_route.irq = -1;
@@ -1618,6 +1636,12 @@ static void assigned_dev_msix_mmio_write(void *opaque, 
hwaddr addr,
 if (ret) {
 error_report("Error updating irq routing entry (%d)", ret);
 }
+if (kvm_check_extension(kvm_state, KVM_CAP_PI)) {
+if (kvm_device_pi_update(kvm_state, adev->dev_id) < 0) {
+perror("assigned_dev_update_msi_msg: "
+"kvm_device_pi_update");
+}
+}
 }
 }
 }
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 2669938..b34f3c4 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -765,6 +765,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_FIXUP_HCALL 103
 #define KVM_CAP_PPC_ENABLE_HCALL 104
 #define KVM_CAP_CHECK_EXTENSION_VM 105
+#define KVM_CAP_PI 106
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1020,6 +1021,7 @@ struct kvm_s390_ucas_mapping {
 #define KVM_XEN_HVM_CONFIG_IOW(KVMIO,  0x7a, struct kvm_xen_hvm_config)
 #define KVM_SET_CLOCK _IOW(KVMIO,  0x7b, struct kvm_clock_data)
 #define KVM_GET_CLOCK _IOR(KVMIO,  0x7c, struct kvm_clock_data)
+#define KVM_ASSIGN_DEV_PI_UPDATE  _IOR(KVMIO,  0x7d, __u32)
 /* Available with KVM_CAP_PIT_STATE2 */
 #define KVM_GET_PIT2  _IOR(KVMIO,  0x9f, struct kvm_pit_state2)
 #define KVM_SET_PIT2  _IOW(KVMIO,  0xa0, struct kvm_pit_state2)
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index ccf36e8..3dd8e5e 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -2660,6 +2660,11 @@ int kvm_device_msi_assign(KVMState *s, uint32_t dev_id, 
int virq)
   KVM_DEV_IRQ_GUEST_MSI, virq);
 }
 
+int kvm_device_pi_update(KVMState *s, uint32_t dev_id)
+{
+return kvm_vm_ioctl(s, KVM_ASSIGN_DEV_PI_UPDATE, &dev_id);
+}
+
 int kvm_device_msi_deassign(KVMState *s, uint32_t dev_id)
 {
 return kvm_deassign_irq_internal(s, dev_id, KVM_DEV_IRQ_GUEST_MSI |
diff --git a/target-i386/kvm_i386.h b/target-i386/kvm_i386.h
index cac30fd..c119b3e 100644
--- a/target-i386/kvm_i386.h
+++ b/target-i386/kvm_i386.h
@@ -37,4 +37,5 @@ int kvm_device_msix_set_vector(KVMState *s, uint32_t dev_id, 
uint32_t vector,
 int kvm_device_msix_assign(KVMState *s, uint32_t dev_id);
 int kvm_device_msix_deassign(KVMState *s, uint32_t dev_id);
 
+int kvm_device_pi_update(KVMState *s, uint32_t dev_id);
 #endif
-- 
1.7.1

--
To un

[PATCH 02/13] KVM: Initialize VT-d Posted-Interrtups Descriptor

2014-11-09 Thread Feng Wu
This patch initialize the VT-d Posted-interrupt Descritpor.

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/irq_remapping.h |1 +
 arch/x86/kernel/apic/apic.c  |1 +
 arch/x86/kvm/vmx.c   |   56 -
 3 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/irq_remapping.h 
b/arch/x86/include/asm/irq_remapping.h
index b7747c4..a3cc437 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -57,6 +57,7 @@ extern bool setup_remapped_irq(int irq,
   struct irq_chip *chip);
 
 void irq_remap_modify_chip_defaults(struct irq_chip *chip);
+extern int irq_post_enabled;
 
 #else  /* CONFIG_IRQ_REMAP */
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ba6cc04..987408d 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -162,6 +162,7 @@ __setup("apicpmtimer", setup_apicpmtimer);
 #endif
 
 int x2apic_mode;
+EXPORT_SYMBOL_GPL(x2apic_mode);
 #ifdef CONFIG_X86_X2APIC
 /* x2apic enabled before OS handover */
 int x2apic_preenabled;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3e556c6..a4670d3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -45,6 +45,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "trace.h"
 
@@ -408,13 +409,32 @@ struct nested_vmx {
 };
 
 #define POSTED_INTR_ON  0
+#define POSTED_INTR_SN  1
+
 /* Posted-Interrupt Descriptor */
 struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */
-   u32 control;/* bit 0 of control is outstanding notification bit */
-   u32 rsvd[7];
+   union {
+   struct {
+   u64 on  : 1,
+   sn  : 1,
+   rsvd_1  : 13,
+   ndm : 1,
+   nv  : 8,
+   rsvd_2  : 8,
+   ndst: 32;
+   };
+   u64 control;
+   };
+   u32 rsvd[6];
 } __aligned(64);
 
+static void pi_clear_sn(struct pi_desc *pi_desc)
+{
+   return clear_bit(POSTED_INTR_SN,
+   (unsigned long *)&pi_desc->control);
+}
+
 static bool pi_test_and_set_on(struct pi_desc *pi_desc)
 {
return test_and_set_bit(POSTED_INTR_ON,
@@ -4396,6 +4416,33 @@ static void ept_set_mmio_spte_mask(void)
kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
 }
 
+static bool pi_desc_init(struct vcpu_vmx *vmx)
+{
+   unsigned int dest;
+
+   if (irq_post_enabled == 0)
+   return true;
+
+   /*
+* Initialize Posted-Interrupt Descriptor
+*/
+
+   pi_clear_sn(&vmx->pi_desc);
+   vmx->pi_desc.nv = POSTED_INTR_VECTOR;
+
+   /* Physical mode for Notificaiton Event */
+   vmx->pi_desc.ndm = 0;
+   dest = cpu_physical_id(vmx->vcpu.cpu);
+
+   if (x2apic_mode)
+   vmx->pi_desc.ndst = dest;
+   else
+   vmx->pi_desc.ndst = (dest << 8) & 0xFF00;
+
+   return true;
+}
+
+
 /*
  * Sets up the vmcs for emulated real mode.
  */
@@ -4439,6 +4486,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
+
+   if (!pi_desc_init(vmx)) {
+   printk(KERN_ERR "Initialize PI descriptor error!\n");
+   return 1;
+   }
}
 
if (ple_gap) {
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/13] iommu/vt-d: VT-d Posted-Interrupts feature detection

2014-11-09 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

This patch adds feature detection logic for VT-d posted-interrupt.

Signed-off-by: Feng Wu 
---
 drivers/iommu/intel_irq_remapping.c |   13 +
 drivers/iommu/irq_remapping.c   |4 
 drivers/iommu/irq_remapping.h   |5 +
 include/linux/intel-iommu.h |1 +
 4 files changed, 23 insertions(+), 0 deletions(-)

diff --git a/drivers/iommu/intel_irq_remapping.c 
b/drivers/iommu/intel_irq_remapping.c
index 7c80661..f99f0f1 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -580,6 +580,19 @@ static int __init intel_irq_remapping_supported(void)
if (!ecap_ir_support(iommu->ecap))
return 0;
 
+   /* VT-d posted-interrupt feature detection*/
+   if (disable_irq_post == 0)
+   for_each_drhd_unit(drhd) {
+   struct intel_iommu *iommu = drhd->iommu;
+
+   if (!cap_pi_support(iommu->cap)) {
+   irq_post_enabled = 0;
+   disable_irq_post = 1;
+   break;
+   }
+   irq_post_enabled = 1;
+   }
+
return 1;
 }
 
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 74a1767..2f8ee00 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -23,6 +23,10 @@ int irq_remap_broken;
 int disable_sourceid_checking;
 int no_x2apic_optout;
 
+int disable_irq_post = 1;
+int irq_post_enabled = 0;
+EXPORT_SYMBOL_GPL(irq_post_enabled);
+
 static struct irq_remap_ops *remap_ops;
 
 static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
index fde250f..7bb5913 100644
--- a/drivers/iommu/irq_remapping.h
+++ b/drivers/iommu/irq_remapping.h
@@ -37,6 +37,9 @@ extern int disable_sourceid_checking;
 extern int no_x2apic_optout;
 extern int irq_remapping_enabled;
 
+extern int disable_irq_post;
+extern int irq_post_enabled;
+
 struct irq_remap_ops {
/* Check whether Interrupt Remapping is supported */
int (*supported)(void);
@@ -91,6 +94,8 @@ extern struct irq_remap_ops amd_iommu_irq_ops;
 #define irq_remapping_enabled 0
 #define disable_irq_remap 1
 #define irq_remap_broken  0
+#define disable_irq_post  1
+#define irq_post_enabled  0
 
 #endif /* CONFIG_IRQ_REMAP */
 
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index a65208a..5b1a124 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -87,6 +87,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 /*
  * Decoding Capability Register
  */
+#define cap_pi_support(c)  (((c) >> 59) & 1)
 #define cap_read_drain(c)  (((c) >> 55) & 1)
 #define cap_write_drain(c) (((c) >> 54) & 1)
 #define cap_max_amask_val(c)   (((c) >> 48) & 0x3f)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/13] KVM: Add KVM_CAP_PI to detect VT-d Posted-Interrtups

2014-11-09 Thread Feng Wu
This patch adds KVM_CAP_PI to detect VT-d Posted-Interrtups
feature for QEMU.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/x86.c   |4 
 include/uapi/linux/kvm.h |1 +
 2 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0033df3..b447a98 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -63,6 +63,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
@@ -2775,6 +2776,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
ext)
case KVM_CAP_TSC_DEADLINE_TIMER:
r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
break;
+   case KVM_CAP_PI:
+   r = irq_post_enabled;
+   break;
default:
r = 0;
break;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6076882..7593c52 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -761,6 +761,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_FIXUP_HCALL 103
 #define KVM_CAP_PPC_ENABLE_HCALL 104
 #define KVM_CAP_CHECK_EXTENSION_VM 105
+#define KVM_CAP_PI 106
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 04/13] iommu/vt-d: Adjust 'struct irte' to better suit for VT-d Posted-Interrupts

2014-11-09 Thread Feng Wu
This patch adjusts the definition of 'struct irte', so that we can
add the VT-d Posted-Interrtups format in this structure later.

Signed-off-by: Feng Wu 
---
 drivers/iommu/intel_irq_remapping.c |   35 +++
 include/linux/dmar.h|4 ++--
 2 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/drivers/iommu/intel_irq_remapping.c 
b/drivers/iommu/intel_irq_remapping.c
index f99f0f1..776da10 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -310,9 +310,9 @@ static void set_irte_sid(struct irte *irte, unsigned int 
svt,
 {
if (disable_sourceid_checking)
svt = SVT_NO_VERIFY;
-   irte->svt = svt;
-   irte->sq = sq;
-   irte->sid = sid;
+   irte->irq_remap_high.svt = svt;
+   irte->irq_remap_high.sq = sq;
+   irte->irq_remap_high.sid = sid;
 }
 
 static int set_ioapic_sid(struct irte *irte, int apic)
@@ -917,8 +917,8 @@ static void prepare_irte(struct irte *irte, int vector,
 {
memset(irte, 0, sizeof(*irte));
 
-   irte->present = 1;
-   irte->dst_mode = apic->irq_dest_mode;
+   irte->irq_remap_low.present = 1;
+   irte->irq_remap_low.dst_mode = apic->irq_dest_mode;
/*
 * Trigger mode in the IRTE will always be edge, and for IO-APIC, the
 * actual level or edge trigger will be setup in the IO-APIC
@@ -926,11 +926,11 @@ static void prepare_irte(struct irte *irte, int vector,
 * For more details, see the comments (in io_apic.c) explainig IO-APIC
 * irq migration in the presence of interrupt-remapping.
*/
-   irte->trigger_mode = 0;
-   irte->dlvry_mode = apic->irq_delivery_mode;
-   irte->vector = vector;
-   irte->dest_id = IRTE_DEST(dest);
-   irte->redir_hint = 1;
+   irte->irq_remap_low.trigger_mode = 0;
+   irte->irq_remap_low.dlvry_mode = apic->irq_delivery_mode;
+   irte->irq_remap_low.vector = vector;
+   irte->irq_remap_low.dest_id = IRTE_DEST(dest);
+   irte->irq_remap_low.redir_hint = 1;
 }
 
 static int intel_setup_ioapic_entry(int irq,
@@ -973,10 +973,13 @@ static int intel_setup_ioapic_entry(int irq,
"Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
"Avail:%X Vector:%02X Dest:%08X "
"SID:%04X SQ:%X SVT:%X)\n",
-   attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
-   irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
-   irte.avail, irte.vector, irte.dest_id,
-   irte.sid, irte.sq, irte.svt);
+   attr->ioapic, irte.irq_remap_low.present,
+   irte.irq_remap_low.fpd, irte.irq_remap_low.dst_mode,
+   irte.irq_remap_low.redir_hint, irte.irq_remap_low.trigger_mode,
+   irte.irq_remap_low.dlvry_mode, irte.irq_remap_low.avail,
+   irte.irq_remap_low.vector, irte.irq_remap_low.dest_id,
+   irte.irq_remap_high.sid, irte.irq_remap_high.sq,
+   irte.irq_remap_high.svt);
 
entry = (struct IR_IO_APIC_route_entry *)route_entry;
memset(entry, 0, sizeof(*entry));
@@ -1046,8 +1049,8 @@ intel_ioapic_set_affinity(struct irq_data *data, const 
struct cpumask *mask,
return err;
}
 
-   irte.vector = cfg->vector;
-   irte.dest_id = IRTE_DEST(dest);
+   irte.irq_remap_low.vector = cfg->vector;
+   irte.irq_remap_low.dest_id = IRTE_DEST(dest);
 
/*
 * Atomically updates the IRTE with the new destination, vector
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 593fff9..8be5d42 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -159,7 +159,7 @@ struct irte {
vector  : 8,
__reserved_2: 8,
dest_id : 32;
-   };
+   } irq_remap_low;
__u64 low;
};
 
@@ -169,7 +169,7 @@ struct irte {
sq  : 2,
svt : 2,
__reserved_3: 44;
-   };
+   } irq_remap_high;
__u64 high;
};
 };
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH v1 0/2] Define some VFIO interfaces for VT-d Posted-Interrupts

2014-11-20 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

This patch set does the following things:

- Define a new VFIO group KVM_DEV_VFIO_INTERRUPT and it's attributes
KVM_DEV_VFIO_INTERRUPT_POSTING_IRQ. Qemu can use this interface to
configure VT-d PI when guest updates the interrupt configuration
(MSI/MSI-X configuration).

- Define a new VFIO API: vfio_msi_get_irq(), which can be used by KVM
to get the host irq of the assigned devices. Then KVM can update the
associated IRTE for VT-d PI.

Feng Wu (2):
  vfio: Add new interrupt group for VFIO
  vfio: Add VFIO API vfio_msi_get_irq

 Documentation/virtual/kvm/devices/vfio.txt |8 
 drivers/vfio/pci/vfio_pci.c|   10 ++
 include/linux/vfio.h   |2 ++
 include/uapi/linux/kvm.h   |   14 ++
 4 files changed, 34 insertions(+), 0 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH v1 2/2] vfio: Add VFIO API vfio_msi_get_irq

2014-11-20 Thread Feng Wu
This API returns the host irq for the MSI/MSI-X interrrupts.

Signed-off-by: Feng Wu 
---
 drivers/vfio/pci/vfio_pci.c |   10 ++
 include/linux/vfio.h|2 ++
 2 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 9558da3..4fb9828 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -1009,6 +1009,16 @@ put_devs:
kfree(devs.devices);
 }
 
+unsigned int vfio_msi_get_irq(struct vfio_device *device, int vector, bool 
msix)
+{
+   struct vfio_pci_device *vdev =
+   (struct vfio_pci_device *)vfio_device_data(device);
+   struct pci_dev *pdev = vdev->pdev;
+
+   return msix ? vdev->msix[vector].vector : pdev->irq + vector;
+}
+EXPORT_SYMBOL_GPL(vfio_msi_get_irq);
+
 static void __exit vfio_pci_cleanup(void)
 {
pci_unregister_driver(&vfio_pci_driver);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index d320411..007ca55 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -92,6 +92,8 @@ extern void vfio_unregister_iommu_driver(
 /*
  * External user API
  */
+extern unsigned int vfio_msi_get_irq(struct vfio_device *device, int vector,
+ bool msix);
 extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
 extern void vfio_group_put_external_user(struct vfio_group *group);
 extern int vfio_external_user_iommu_id(struct vfio_group *group);
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH v1 1/2] vfio: Add new interrupt group for VFIO

2014-11-20 Thread Feng Wu
Add new group KVM_DEV_VFIO_INTERRUPT and command
KVM_DEV_VFIO_DEVIE_POSTING_IRQ related to it.

This is used for VT-d Posted-Interrupts setup.

Signed-off-by: Feng Wu 
---
 Documentation/virtual/kvm/devices/vfio.txt |8 
 include/uapi/linux/kvm.h   |   14 ++
 2 files changed, 22 insertions(+), 0 deletions(-)

diff --git a/Documentation/virtual/kvm/devices/vfio.txt 
b/Documentation/virtual/kvm/devices/vfio.txt
index ef51740..bd99176 100644
--- a/Documentation/virtual/kvm/devices/vfio.txt
+++ b/Documentation/virtual/kvm/devices/vfio.txt
@@ -13,6 +13,7 @@ VFIO-group is held by KVM.
 
 Groups:
   KVM_DEV_VFIO_GROUP
+  KVM_DEV_VFIO_INTERRUPT
 
 KVM_DEV_VFIO_GROUP attributes:
   KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking
@@ -20,3 +21,10 @@ KVM_DEV_VFIO_GROUP attributes:
 
 For each, kvm_device_attr.addr points to an int32_t file descriptor
 for the VFIO group.
+
+KVM_DEV_VFIO_INTERRUPT attributes:
+  KVM_DEV_VFIO_INTERRUPT_POSTING_IRQ: Set up the interrupt configuration for
+VT-d Posted-Interrrupts
+
+For each, kvm_device_attr.addr points to struct kvm_posted_intr, which
+include the needed information for VT-d Posted-Interrupts setup.
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6076882..5544fcc 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -943,9 +943,23 @@ struct kvm_device_attr {
__u64   addr;   /* userspace address of attr data */
 };
 
+struct virq_info {
+   __u32   index;  /* index of the msi/msix entry */
+   int virq;   /* virq of the interrupt */
+};
+
+struct kvm_posted_intr {
+   __u32   fd; /* file descriptor of the VFIO device */
+   __u32   count;
+   boolmsix;
+   struct virq_info virq_info[0];
+};
+
 #define  KVM_DEV_VFIO_GROUP1
 #define   KVM_DEV_VFIO_GROUP_ADD   1
 #define   KVM_DEV_VFIO_GROUP_DEL   2
+#define  KVM_DEV_VFIO_INTERRUPT2
+#define   KVM_DEV_VFIO_INTERRUPT_POSTING_IRQ   1
 
 enum kvm_device_type {
KVM_DEV_TYPE_FSL_MPIC_20= 1,
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH v2 1/2] KVM: kvm-vfio: User API for VT-d Posted-Interrupts

2014-11-25 Thread Feng Wu
This patch adds and documents a new attribute
KVM_DEV_VFIO_DEVICE_POSTING_IRQ in KVM_DEV_VFIO_DEVICE group.
This new attribute is used for VT-d Posted-Interrupts.

When guest OS changes the interrupt configuration for an
assigned device, such as, MSI/MSIx data/address fields,
QEMU will use this IRQ attribute to tell KVM to update the
related IRTE according the VT-d Posted-Interrrupts Specification,
such as, the guest vector should be updated in the related IRTE.

Signed-off-by: Feng Wu 
---
 Documentation/virtual/kvm/devices/vfio.txt |9 +
 include/uapi/linux/kvm.h   |   10 ++
 2 files changed, 19 insertions(+), 0 deletions(-)

diff --git a/Documentation/virtual/kvm/devices/vfio.txt 
b/Documentation/virtual/kvm/devices/vfio.txt
index f7aff29..39dee86 100644
--- a/Documentation/virtual/kvm/devices/vfio.txt
+++ b/Documentation/virtual/kvm/devices/vfio.txt
@@ -42,3 +42,12 @@ activated before VFIO_DEVICE_SET_IRQS has been called to 
trigger the IRQ
 or associate an eventfd to it. Unforwarding can only be called while the
 signaling has been disabled with VFIO_DEVICE_SET_IRQS. If this condition is
 not satisfied, the command returns an -EBUSY.
+
+  KVM_DEV_VFIO_DEVICE_POSTING_IRQ: Use posted interrtups mechanism to post
+   the IRQ to guests.
+For this attribute, kvm_device_attr.addr points to a kvm_posted_intr struct.
+
+When guest OS changes the interrupt configuration for an assigned device,
+such as, MSI/MSIx data/address fields, QEMU will use this IRQ attribute
+to tell KVM to update the related IRTE according the VT-d Posted-Interrrupts
+Specification, such as, the guest vector should be updated in the related IRTE.
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a269a42..e5f86ad 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -949,6 +949,7 @@ struct kvm_device_attr {
 #define  KVM_DEV_VFIO_DEVICE   2
 #define   KVM_DEV_VFIO_DEVICE_FORWARD_IRQ  1
 #define   KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ2
+#define   KVM_DEV_VFIO_DEVICE_POSTING_IRQ  3
 
 enum kvm_device_type {
KVM_DEV_TYPE_FSL_MPIC_20= 1,
@@ -973,6 +974,15 @@ struct kvm_arch_forwarded_irq {
__u32 gsi; /* gsi, ie. virtual IRQ number */
 };
 
+struct kvm_posted_intr {
+   __u32   argsz;
+   __u32   fd; /* file descriptor of the VFIO device */
+   __u32   index;  /* VFIO device IRQ index */
+   __u32   start;
+   __u32   count;
+   int virq[0];/* gsi, ie. virtual IRQ number */
+};
+
 /*
  * ioctls for VM fds
  */
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH v2 0/2] kvm-vfio: implement the vfio skeleton for VT-d Posted-Interrupts

2014-11-25 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

This patchset adds the kvm-vfio interface for VT-d Posted-Interrrupts.

In the second patch of this patchset, I leave function
kvm_update_pi_irte() empty, since the purpose of this patch set is
to implement the VFIO related stuff for VT-d PI. kvm_update_pi_irte()
will do the real things, such as, updating IRTE. In fact, I think this
function will be implemented in another file instead of vfio.c. At the
current stage I just list it here to make the build successful. After
some other dependencies (such as, irq core changes in Linux kernel) is
resolved, I will send out the rest part of the VT-d PI patchset.

This patchset is based on the following Eric's VFIO patchset:
[PATCH v3 0/8] KVM-VFIO IRQ forward control

v1->v2
- Re-use KVM_DEV_VFIO_DEVICE group for VT-d PI.
- Define a new attribute in KVM_DEV_VFIO_DEVICE group.
- Teach KVM about sturct pci_dev, and get host irq from it. 

Feng Wu (2):
  KVM: kvm-vfio: User API for VT-d Posted-Interrupts
  KVM: kvm-vfio: implement the VFIO skeleton for VT-d Posted-Interrupts

 Documentation/virtual/kvm/devices/vfio.txt |9 ++
 include/uapi/linux/kvm.h   |   10 +++
 virt/kvm/vfio.c|  115 
 3 files changed, 134 insertions(+), 0 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH v2 2/2] KVM: kvm-vfio: implement the VFIO skeleton for VT-d Posted-Interrupts

2014-11-25 Thread Feng Wu
This patch adds the kvm-vfio interface for VT-d Posted-Interrrupts.
When guests updates MSI/MSI-x information for an assigned-device,
QEMU will use KVM_DEV_VFIO_DEVICE_POSTING_IRQ attribute to setup
IRTE for VT-d PI. This patch implement this IRQ attribute.

Signed-off-by: Feng Wu 
---
 virt/kvm/vfio.c |  115 +++
 1 files changed, 115 insertions(+), 0 deletions(-)

diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 6bc7001..435adf4 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -446,6 +446,115 @@ out:
return ret;
 }
 
+static int kvm_update_pi_irte(struct kvm *kvm, int host_irq, int guest_irq)
+{
+   /*
+* TODO: need to add the real code to update the related IRTE,
+* Basically, This fucntion will do the following things:
+* - Get struct kvm_kernel_irq_routing_entry from guest irq
+* - Get the destination vCPU of the interrupts
+* - Update the IRTE according the VT-d PI Spec.
+*   1) guest vector
+*   2) Posted-Interrupts descritpor addresss
+*/
+
+   return 0;
+}
+
+static int kvm_vfio_pci_get_irq_count(struct pci_dev *pdev, int irq_type)
+{
+   if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
+   u8 pin;
+   pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
+   if (pin)
+   return 1;
+   } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
+   u8 pos;
+   u16 flags;
+
+   pos = pdev->msi_cap;
+   if (pos) {
+   pci_read_config_word(pdev,
+pos + PCI_MSI_FLAGS, &flags);
+   return 1 << ((flags & PCI_MSI_FLAGS_QMASK) >> 1);
+   }
+   } else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX) {
+   u8 pos;
+   u16 flags;
+
+   pos = pdev->msix_cap;
+   if (pos) {
+   pci_read_config_word(pdev,
+pos + PCI_MSIX_FLAGS, &flags);
+
+   return (flags & PCI_MSIX_FLAGS_QSIZE) + 1;
+   }
+   } else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX)
+   if (pci_is_pcie(pdev))
+   return 1;
+
+   return 0;
+}
+
+static int kvm_vfio_set_pi(struct kvm_device *kdev, int32_t __user *argp)
+{
+   struct kvm_posted_intr pi_info;
+   int *virq;
+   unsigned long minsz;
+   struct vfio_device *vdev;
+   struct msi_desc *entry;
+   struct device *dev;
+   struct pci_dev *pdev;
+   int i, max, ret;
+
+   minsz = offsetofend(struct kvm_posted_intr, count);
+
+   if (copy_from_user(&pi_info, (void __user *)argp, minsz))
+   return -EFAULT;
+
+   if (pi_info.argsz < minsz || pi_info.index >= VFIO_PCI_NUM_IRQS)
+   return -EINVAL;
+
+   vdev = kvm_vfio_get_vfio_device(pi_info.fd);
+   if (IS_ERR(vdev))
+   return PTR_ERR(vdev);
+
+   dev = kvm_vfio_external_base_device(vdev);
+   if (!dev)
+   return -EFAULT;
+
+   pdev = to_pci_dev(dev);
+
+   max = kvm_vfio_pci_get_irq_count(pdev, pi_info.index);
+
+   if (pi_info.argsz - minsz < pi_info.count * sizeof(int) ||
+   pi_info.start >= max || pi_info.start + pi_info.count > max)
+   return -EINVAL;
+
+   virq = memdup_user((void __user *)((unsigned long)argp + minsz),
+  pi_info.count * sizeof(int));
+   if (IS_ERR(virq))
+   return PTR_ERR(virq);
+
+   for (i=0; imsi_list, list) {
+   if (entry->msi_attrib.entry_nr != pi_info.start+i)
+   continue;
+
+   ret = kvm_update_pi_irte(kdev->kvm,
+entry->irq, virq[i]);
+   if (ret) {
+   kfree(virq);
+   return -EFAULT;
+   }
+   }
+   }
+
+   kfree(virq);
+
+   return 0;
+}
+
 static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg)
 {
int32_t __user *argp = (int32_t __user *)(unsigned long)arg;
@@ -456,6 +565,9 @@ static int kvm_vfio_set_device(struct kvm_device *kdev, 
long attr, u64 arg)
case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ:
ret = kvm_vfio_control_irq_forward(kdev, attr, argp);
break;
+   case KVM_DEV_VFIO_DEVICE_POSTING_IRQ:
+   ret = kvm_vfio_set_pi(kdev, argp);
+   break;
default:
ret = -ENXIO;
}
@@ -511,6 +623,9 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ:
return 0;
 #endif
+   case KVM_DEV_VFIO_DEVICE_POSTING_IRQ:
+

[v2 16/25] KVM: make kvm_set_msi_irq() public

2014-12-02 Thread Feng Wu
Make kvm_set_msi_irq() public, we can use this function outside.

Signed-off-by: Feng Wu 
---
 include/linux/kvm_host.h |2 ++
 virt/kvm/irq_comm.c  |2 +-
 2 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index cfa85ac..5cd4420 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -785,6 +785,8 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
   struct kvm_irq_ack_notifier *kian);
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+struct kvm_lapic_irq *irq);
 
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index f3c5d69..231671a 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -106,7 +106,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
kvm_lapic *src,
return r;
 }
 
-static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
   struct kvm_lapic_irq *irq)
 {
trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 25/25] iommu/vt-d: Add a command line parameter for VT-d posted-interrupts

2014-12-02 Thread Feng Wu
Enable VT-d Posted-Interrtups and add a command line
parameter for it.

Signed-off-by: Feng Wu 
---
 Documentation/kernel-parameters.txt |1 +
 drivers/iommu/irq_remapping.c   |   12 
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 838f377..324b790 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1453,6 +1453,7 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
nosid   disable Source ID checking
no_x2apic_optout
BIOS x2APIC opt-out request will be ignored
+   nopost  disable Interrupt Posting
 
iomem=  Disable strict checking of access to MMIO memory
strict  regions from userspace.
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index b008663..aa3cd23 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -24,7 +24,7 @@ int irq_remap_broken;
 int disable_sourceid_checking;
 int no_x2apic_optout;
 
-int disable_irq_post = 1;
+int disable_irq_post = 0;
 
 static struct irq_remap_ops *remap_ops;
 
@@ -59,14 +59,18 @@ static __init int setup_irqremap(char *str)
return -EINVAL;
 
while (*str) {
-   if (!strncmp(str, "on", 2))
+   if (!strncmp(str, "on", 2)) {
disable_irq_remap = 0;
-   else if (!strncmp(str, "off", 3))
+   disable_irq_post = 0;
+   } else if (!strncmp(str, "off", 3)) {
disable_irq_remap = 1;
-   else if (!strncmp(str, "nosid", 5))
+   disable_irq_post = 1;
+   } else if (!strncmp(str, "nosid", 5))
disable_sourceid_checking = 1;
else if (!strncmp(str, "no_x2apic_optout", 16))
no_x2apic_optout = 1;
+   else if (!strncmp(str, "nopost", 6))
+   disable_irq_post = 1;
 
str += strcspn(str, ",");
while (*str == ',')
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 23/25] KVM: Add the handler for Wake-up Vector

2014-12-02 Thread Feng Wu
When vCPU is blocked and an external interrupts from assigned
devices is delivered to it, VT-d Posted-Interrupts mechanism
will deliver an interrupt to the associated physical CPU with
Wake-up Vector. In its handler, we find the destination vCPU
and wake up it.

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/kvm_host.h |2 +
 arch/x86/kvm/vmx.c  |   52 +++
 arch/x86/kvm/x86.c  |   22 +++-
 include/linux/kvm_host.h|3 ++
 virt/kvm/kvm_main.c |3 ++
 5 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2fd85a5..76fc32d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -101,6 +101,8 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, 
int level)
 
 #define ASYNC_PF_PER_VCPU 64
 
+extern void (*wakeup_handler_callback)(void);
+
 enum kvm_reg {
VCPU_REGS_RAX = 0,
VCPU_REGS_RCX = 1,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e71bf3b..dc6fd84 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -822,6 +822,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
 static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 
+/*
+ * We maintian a per-CPU linked-list of VCPU, so in wakeup_handler() we
+ * can find which VCPU should be waken up.
+ */
+static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+
 static unsigned long *vmx_io_bitmap_a;
 static unsigned long *vmx_io_bitmap_b;
 static unsigned long *vmx_msr_bitmap_legacy;
@@ -2813,6 +2820,8 @@ static int hardware_enable(void)
return -EBUSY;
 
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
+   INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
+   spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
 
/*
 * Now we can enable the vmclear operation in kdump
@@ -9177,6 +9186,7 @@ static int vmx_vcpu_pre_block(struct kvm_vcpu *vcpu)
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
struct pi_desc old;
struct pi_desc new;
+   unsigned long flags;
 
if (!irq_remapping_cap(IRQ_POSTING_CAP))
return 0;
@@ -9216,9 +9226,22 @@ static int vmx_vcpu_pre_block(struct kvm_vcpu *vcpu)
 
/* set 'NV' to 'wakeup vector' */
new.nv = POSTED_INTR_WAKEUP_VECTOR;
+
+   /*
+* We should save physical cpu id here, vcpu->cpu may
+* be changed due to preemption, in that case, this
+* do-while loop will run again.
+*/
+   vcpu->wakeup_cpu = vcpu->cpu;
} while (cmpxchg(&pi_desc->control, old.control, new.control)
!= old.control);
 
+   spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+   vcpu->wakeup_cpu), flags);
+   list_add_tail(&vcpu->blocked_vcpu_list,
+   &per_cpu(blocked_vcpu_on_cpu, vcpu->wakeup_cpu));
+   spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
+   vcpu->wakeup_cpu), flags);
return 0;
 }
 
@@ -9228,6 +9251,7 @@ static void vmx_vcpu_post_block(struct kvm_vcpu *vcpu)
struct pi_desc old;
struct pi_desc new;
unsigned int dest = 0;
+   unsigned long flags;
 
if (!irq_remapping_cap(IRQ_POSTING_CAP))
return;
@@ -9249,6 +9273,13 @@ static void vmx_vcpu_post_block(struct kvm_vcpu *vcpu)
} while (cmpxchg(&pi_desc->control, old.control, new.control)
!= old.control);
 
+   spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+   vcpu->wakeup_cpu), flags);
+   list_del(&vcpu->blocked_vcpu_list);
+   spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
+   vcpu->wakeup_cpu), flags);
+   vcpu->wakeup_cpu = -1;
+
pi_clear_sn(pi_desc);
 }
 
@@ -9366,6 +9397,25 @@ static struct kvm_x86_ops vmx_x86_ops = {
.vcpu_post_block = vmx_vcpu_post_block,
 };
 
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+void wakeup_handler(void)
+{
+   struct kvm_vcpu *vcpu;
+   int cpu = smp_processor_id();
+
+   spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+   list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
+   blocked_vcpu_list) {
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+   if (pi_test_on(pi_desc) == 1)
+   kvm_vcpu_kick(vcpu);
+   }
+   spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+}
+
 static int __init vmx_init(void)

[v2 24/25] KVM: Suppress posted-interrupt when 'SN' is set

2014-12-02 Thread Feng Wu
Currently, we don't support urgent interrupt, all interrupts
are recognized as non-urgent interrupt, so we cannot send
posted-interrupt when 'SN' is set.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/vmx.c |   11 +--
 1 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index dc6fd84..6b2f3e7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4306,15 +4306,22 @@ static int vmx_vm_has_apicv(struct kvm *kvm)
 static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
-   int r;
+   int r, sn;
 
if (pi_test_and_set_pir(vector, &vmx->pi_desc))
return;
 
+   /*
+* Currently, we don't support urgent interrupt, all interrupts
+* are recognized as non-urgent interrupt, so we cannot send
+* posted-interrupt when 'SN' is set.
+*/
+   sn = pi_test_sn(&vmx->pi_desc);
+
r = pi_test_and_set_on(&vmx->pi_desc);
kvm_make_request(KVM_REQ_EVENT, vcpu);
 #ifdef CONFIG_SMP
-   if (!r && (vcpu->mode == IN_GUEST_MODE))
+   if (!r && !sn && (vcpu->mode == IN_GUEST_MODE))
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
POSTED_INTR_VECTOR);
else
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 21/25] KVM: Update Posted-Interrupts descriptor during vCPU scheduling

2014-12-02 Thread Feng Wu
Update Posted-Interrupts descriptor according to the
following rules:
- Before vCPU block, set 'NV' to POSTED_INTR_WAKEUP_VECTOR
- After vCPU block, set 'NV' back to POSTED_INTR_VECTOR

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/kvm_host.h |5 ++
 arch/x86/kvm/vmx.c  |   83 +++
 arch/x86/kvm/x86.c  |   16 +++
 virt/kvm/kvm_main.c |   11 +
 4 files changed, 115 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6878429..2fd85a5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -775,6 +775,8 @@ struct kvm_x86_ops {
 
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
u64 (*get_pi_desc_addr)(struct kvm_vcpu *vcpu);
+   int (*vcpu_pre_block)(struct kvm_vcpu *vcpu);
+   void (*vcpu_post_block)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
@@ -1100,4 +1102,7 @@ void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
 bool kvm_find_dest_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu);
 
+int kvm_arch_vcpu_pre_block(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_post_block(struct kvm_vcpu *vcpu);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 81f239b..a1966b9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9147,6 +9147,86 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
shrink_ple_window(vcpu);
 }
 
+static int vmx_vcpu_pre_block(struct kvm_vcpu *vcpu)
+{
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+   struct pi_desc old;
+   struct pi_desc new;
+
+   if (!irq_remapping_cap(IRQ_POSTING_CAP))
+   return 0;
+
+   memset(&old, 0, sizeof(old));
+   memset(&new, 0, sizeof(new));
+
+   do {
+   old.control = new.control = pi_desc->control;
+
+   /*
+* A posted-interrupt happened in the one of the
+* following two cases:
+* 1. After the latest pir-to-virr sync operation
+* in kvm_arch_vcpu_runnable() function
+* 2. In this do-while() loop, a posted-interrupt
+* occurs.
+*
+* For either of above cases, we should not block
+* the VCPU.
+*/
+   if (pi_test_on(pi_desc) == 1) {
+   /*
+* Need to set this flag, then the inject will
+* be synced from PIR to vIRR before VM-ENTRY.
+* In fact, for guest IPI case, in function
+* vmx_deliver_posted_interrupt(), this flags
+* has already been set, but if the interrupt
+* is injected by VT-d PI hardware, we need
+* to set this.
+*/
+   kvm_make_request(KVM_REQ_EVENT, vcpu);
+   return 1;
+   }
+
+   pi_clear_sn(&new);
+
+   /* set 'NV' to 'wakeup vector' */
+   new.nv = POSTED_INTR_WAKEUP_VECTOR;
+   } while (cmpxchg(&pi_desc->control, old.control, new.control)
+   != old.control);
+
+   return 0;
+}
+
+static void vmx_vcpu_post_block(struct kvm_vcpu *vcpu)
+{
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+   struct pi_desc old;
+   struct pi_desc new;
+   unsigned int dest = 0;
+
+   if (!irq_remapping_cap(IRQ_POSTING_CAP))
+   return;
+
+   pi_set_sn(pi_desc);
+
+   do {
+   old.control = new.control = pi_desc->control;
+
+   dest = cpu_physical_id(vcpu->cpu);
+
+   if (x2apic_enabled())
+   new.ndst = dest;
+   else
+   new.ndst = (dest << 8) & 0xFF00;
+
+   /* set 'NV' to 'notification vector' */
+   new.nv = POSTED_INTR_VECTOR;
+   } while (cmpxchg(&pi_desc->control, old.control, new.control)
+   != old.control);
+
+   pi_clear_sn(pi_desc);
+}
+
 static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -9256,6 +9336,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
.sched_in = vmx_sched_in,
 
.get_pi_desc_addr = vmx_get_pi_desc_addr,
+
+   .vcpu_pre_block = vmx_vcpu_pre_block,
+   .vcpu_post_block = vmx_vcpu_post_block,
 };
 
 static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0033df3..9706984 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7731,6 +7731,22 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)

[v2 22/25] KVM: Change NDST field after vCPU scheduling

2014-12-02 Thread Feng Wu
This patch changes the NDST filed of Posted-Interrupts
Descriptor after vCPU is scheduled to another physical
CPU.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/vmx.c |   25 +
 1 files changed, 25 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a1966b9..e71bf3b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1906,6 +1906,31 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
vmx->loaded_vmcs->cpu = cpu;
}
+
+   if (irq_remapping_cap(IRQ_POSTING_CAP) && (vcpu->cpu != cpu)) {
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+   struct pi_desc old, new;
+   unsigned int dest;
+
+   memset(&old, 0, sizeof(old));
+   memset(&new, 0, sizeof(new));
+
+   pi_set_sn(pi_desc);
+
+   do {
+   old.control = new.control = pi_desc->control;
+
+   dest = cpu_physical_id(cpu);
+
+   if (x2apic_enabled())
+   new.ndst = dest;
+   else
+   new.ndst = (dest << 8) & 0xFF00;
+
+   } while (cmpxchg(&pi_desc->control, old.control,
+   new.control) != old.control);
+   pi_clear_sn(pi_desc);
+   }
 }
 
 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 19/25] KVM: x86: kvm-vfio: VT-d posted-interrupts setup

2014-12-02 Thread Feng Wu
This patch defines macro __KVM_HAVE_ARCH_KVM_VFIO_POSTING and
implement kvm_arch_vfio_update_pi_irte for x86 architecture.

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/kvm_host.h |2 +
 arch/x86/kvm/Makefile   |2 +-
 arch/x86/kvm/kvm_vfio_x86.c |   68 +++
 3 files changed, 71 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kvm/kvm_vfio_x86.c

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9b45b78..6878429 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -82,6 +82,8 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, 
int level)
(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
 }
 
+#define __KVM_HAVE_ARCH_KVM_VFIO_POSTING
+
 #define SELECTOR_TI_MASK (1 << 2)
 #define SELECTOR_RPL_MASK 0x03
 
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 25d22b2..8809d58 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -14,7 +14,7 @@ kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)   += 
$(KVM)/assigned-dev.o $(KVM)/iommu.o
 kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
 
 kvm-y  += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-  i8254.o cpuid.o pmu.o
+  i8254.o cpuid.o pmu.o kvm_vfio_x86.o
 kvm-intel-y+= vmx.o
 kvm-amd-y  += svm.o
 
diff --git a/arch/x86/kvm/kvm_vfio_x86.c b/arch/x86/kvm/kvm_vfio_x86.c
new file mode 100644
index 000..c59a31a
--- /dev/null
+++ b/arch/x86/kvm/kvm_vfio_x86.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2014 Intel Corporation.
+ * Authors: Feng Wu 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+
+/*
+ * kvm_arch_vfio_update_pi_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * returns 0 on success, < 0 on failure
+ */
+int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+uint32_t guest_irq)
+{
+   struct kvm_kernel_irq_routing_entry *e;
+   struct kvm_irq_routing_table *irq_rt;
+   struct kvm_lapic_irq irq;
+   struct kvm_vcpu *vcpu;
+   struct vcpu_data vcpu_info;
+   int idx, ret = -EINVAL;
+
+   idx = srcu_read_lock(&kvm->irq_srcu);
+   irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+   BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
+
+   hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
+   if (e->type != KVM_IRQ_ROUTING_MSI)
+   continue;
+   /*
+* VT-d PI cannot support posting multicast/broadcast
+* interrupts to a VCPU, we still use interrupt remapping
+* for these kind of interrupts.
+*/
+
+   kvm_set_msi_irq(e, &irq);
+   if (!kvm_find_dest_vcpu(kvm, &irq, &vcpu))
+   continue;
+
+   vcpu_info.pi_desc_addr = kvm_x86_ops->get_pi_desc_addr(vcpu);
+   vcpu_info.vector = irq.vector;
+
+   if (irq_set_vcpu_affinity(host_irq, &vcpu_info) < 0) {
+   printk(KERN_INFO "%s: failed to update PI IRTE\n",
+   __func__);
+   ret = -EINVAL;
+   goto out;
+   }
+   }
+
+   ret = 0;
+out:
+   srcu_read_unlock(&kvm->irq_srcu, idx);
+   return ret;
+}
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 17/25] KVM: kvm-vfio: User API for VT-d Posted-Interrupts

2014-12-02 Thread Feng Wu
This patch adds and documents a new attribute
KVM_DEV_VFIO_DEVICE_POSTING_IRQ in KVM_DEV_VFIO_DEVICE group.
This new attribute is used for VT-d Posted-Interrupts.

When guest OS changes the interrupt configuration for an
assigned device, such as, MSI/MSIx data/address fields,
QEMU will use this IRQ attribute to tell KVM to update the
related IRTE according the VT-d Posted-Interrrupts Specification,
such as, the guest vector should be updated in the related IRTE.

Signed-off-by: Feng Wu 
---
 Documentation/virtual/kvm/devices/vfio.txt |9 +
 include/uapi/linux/kvm.h   |   10 ++
 2 files changed, 19 insertions(+), 0 deletions(-)

diff --git a/Documentation/virtual/kvm/devices/vfio.txt 
b/Documentation/virtual/kvm/devices/vfio.txt
index f7aff29..41e12b7 100644
--- a/Documentation/virtual/kvm/devices/vfio.txt
+++ b/Documentation/virtual/kvm/devices/vfio.txt
@@ -42,3 +42,12 @@ activated before VFIO_DEVICE_SET_IRQS has been called to 
trigger the IRQ
 or associate an eventfd to it. Unforwarding can only be called while the
 signaling has been disabled with VFIO_DEVICE_SET_IRQS. If this condition is
 not satisfied, the command returns an -EBUSY.
+
+  KVM_DEV_VFIO_DEVICE_POSTING_IRQ: Use posted interrtups mechanism to post
+   the IRQ to guests.
+For this attribute, kvm_device_attr.addr points to a kvm_vfio_dev_irq struct.
+
+When guest OS changes the interrupt configuration for an assigned device,
+such as, MSI/MSIx data/address fields, QEMU will use this IRQ attribute
+to tell KVM to update the related IRTE according the VT-d Posted-Interrrupts
+Specification, such as, the guest vector should be updated in the related IRTE.
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a269a42..7d98650 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -949,6 +949,7 @@ struct kvm_device_attr {
 #define  KVM_DEV_VFIO_DEVICE   2
 #define   KVM_DEV_VFIO_DEVICE_FORWARD_IRQ  1
 #define   KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ2
+#define   KVM_DEV_VFIO_DEVICE_POSTING_IRQ  3
 
 enum kvm_device_type {
KVM_DEV_TYPE_FSL_MPIC_20= 1,
@@ -973,6 +974,15 @@ struct kvm_arch_forwarded_irq {
__u32 gsi; /* gsi, ie. virtual IRQ number */
 };
 
+struct kvm_vfio_dev_irq {
+   __u32   argsz;
+   __u32   fd; /* file descriptor of the VFIO device */
+   __u32   index;  /* VFIO device IRQ index */
+   __u32   start;
+   __u32   count;
+   __u32   gsi[];  /* gsi, ie. virtual IRQ number */
+};
+
 /*
  * ioctls for VM fds
  */
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 20/25] x86, irq: Define a global vector for VT-d Posted-Interrupts

2014-12-02 Thread Feng Wu
Currently, we use a global vector as the Posted-Interrupts
Notification Event for all the vCPUs in the system. We need
to introduce another global vector for VT-d Posted-Interrtups,
which will be used to wakeup the sleep vCPU when an external
interrupt from a direct-assigned device happens for that vCPU.

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/entry_arch.h  |2 ++
 arch/x86/include/asm/hardirq.h |1 +
 arch/x86/include/asm/hw_irq.h  |2 ++
 arch/x86/include/asm/irq_vectors.h |1 +
 arch/x86/kernel/entry_64.S |2 ++
 arch/x86/kernel/irq.c  |   27 +++
 arch/x86/kernel/irqinit.c  |2 ++
 7 files changed, 37 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/entry_arch.h 
b/arch/x86/include/asm/entry_arch.h
index dc5fa66..27ca0af 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -23,6 +23,8 @@ BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
 #ifdef CONFIG_HAVE_KVM
 BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR,
 smp_kvm_posted_intr_ipi)
+BUILD_INTERRUPT3(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR,
+smp_kvm_posted_intr_wakeup_ipi)
 #endif
 
 /*
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 0f5fb6b..9866065 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -14,6 +14,7 @@ typedef struct {
 #endif
 #ifdef CONFIG_HAVE_KVM
unsigned int kvm_posted_intr_ipis;
+   unsigned int kvm_posted_intr_wakeup_ipis;
 #endif
unsigned int x86_platform_ipis; /* arch dependent */
unsigned int apic_perf_irqs;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index e7ae6eb..38fac9b 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -29,6 +29,7 @@
 extern asmlinkage void apic_timer_interrupt(void);
 extern asmlinkage void x86_platform_ipi(void);
 extern asmlinkage void kvm_posted_intr_ipi(void);
+extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
 extern asmlinkage void error_interrupt(void);
 extern asmlinkage void irq_work_interrupt(void);
 
@@ -92,6 +93,7 @@ extern void trace_call_function_single_interrupt(void);
 #define trace_irq_move_cleanup_interrupt  irq_move_cleanup_interrupt
 #define trace_reboot_interrupt  reboot_interrupt
 #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
+#define trace_kvm_posted_intr_wakeup_ipi kvm_posted_intr_wakeup_ipi
 #endif /* CONFIG_TRACING */
 
 struct irq_domain;
diff --git a/arch/x86/include/asm/irq_vectors.h 
b/arch/x86/include/asm/irq_vectors.h
index b26cb12..dca94f2 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -105,6 +105,7 @@
 /* Vector for KVM to deliver posted interrupt IPI */
 #ifdef CONFIG_HAVE_KVM
 #define POSTED_INTR_VECTOR 0xf2
+#define POSTED_INTR_WAKEUP_VECTOR  0xf1
 #endif
 
 /*
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e61c14a..a598447 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -960,6 +960,8 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
 #ifdef CONFIG_HAVE_KVM
 apicinterrupt3 POSTED_INTR_VECTOR \
kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
+apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR \
+   kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi
 #endif
 
 #ifdef CONFIG_X86_MCE_THRESHOLD
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 922d285..47408c3 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -237,6 +237,9 @@ __visible void smp_x86_platform_ipi(struct pt_regs *regs)
 }
 
 #ifdef CONFIG_HAVE_KVM
+void (*wakeup_handler_callback)(void) = NULL;
+EXPORT_SYMBOL_GPL(wakeup_handler_callback);
+
 /*
  * Handler for POSTED_INTERRUPT_VECTOR.
  */
@@ -256,6 +259,30 @@ __visible void smp_kvm_posted_intr_ipi(struct pt_regs 
*regs)
 
set_irq_regs(old_regs);
 }
+
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+__visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs)
+{
+   struct pt_regs *old_regs = set_irq_regs(regs);
+
+   ack_APIC_irq();
+
+   irq_enter();
+
+   exit_idle();
+
+   inc_irq_stat(kvm_posted_intr_wakeup_ipis);
+
+   if (wakeup_handler_callback)
+   wakeup_handler_callback();
+
+   irq_exit();
+
+   set_irq_regs(old_regs);
+}
+
 #endif
 
 __visible void smp_trace_x86_platform_ipi(struct pt_regs *regs)
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 70e181e..844673c 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -144,6 +144,8 @@ static void __init apic_intr_init(void)
 #ifdef CONFIG_HAVE_KVM
/* IPI for KVM to deliver posted interrupt */
alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi);
+   /* IPI for KVM to deliver interrupt to wake up tasks */
+   alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR

[v2 18/25] KVM: kvm-vfio: implement the VFIO skeleton for VT-d Posted-Interrupts

2014-12-02 Thread Feng Wu
This patch adds the kvm-vfio interface for VT-d Posted-Interrrupts.
When guests updates MSI/MSI-x information for an assigned-device,
QEMU will use KVM_DEV_VFIO_DEVICE_POSTING_IRQ attribute to setup
IRTE for VT-d PI. This patch implement this IRQ attribute.

Signed-off-by: Feng Wu 
---
 include/linux/kvm_host.h |   19 
 virt/kvm/vfio.c  |  103 ++
 2 files changed, 122 insertions(+), 0 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5cd4420..8d06678 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1134,6 +1134,25 @@ static inline int kvm_arch_vfio_set_forward(struct 
kvm_fwd_irq *fwd_irq,
 }
 #endif
 
+#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POSTING
+/*
+ * kvm_arch_vfio_update_pi_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * returns 0 on success, < 0 on failure
+ */
+int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+uint32_t guest_irq);
+#else
+static int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+   uint32_t guest_irq)
+{
+   return 0;
+}
+#endif
+
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
 static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 6bc7001..5e5515f 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -446,6 +446,99 @@ out:
return ret;
 }
 
+static int kvm_vfio_pci_get_irq_count(struct pci_dev *pdev, int irq_type)
+{
+   if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
+   u8 pin;
+
+   pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
+   if (pin)
+   return 1;
+   } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX)
+   return pci_msi_vec_count(pdev);
+   else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX)
+   return pci_msix_vec_count(pdev);
+
+   return 0;
+}
+
+static int kvm_vfio_set_pi(struct kvm_device *kdev, int32_t __user *argp)
+{
+   struct kvm_vfio_dev_irq pi_info;
+   uint32_t *gsi;
+   unsigned long minsz;
+   struct vfio_device *vdev;
+   struct msi_desc *entry;
+   struct device *dev;
+   struct pci_dev *pdev;
+   int i, max, ret;
+
+   minsz = offsetofend(struct kvm_vfio_dev_irq, count);
+
+   if (copy_from_user(&pi_info, (void __user *)argp, minsz))
+   return -EFAULT;
+
+   if (pi_info.argsz < minsz || pi_info.index >= VFIO_PCI_NUM_IRQS)
+   return -EINVAL;
+
+   vdev = kvm_vfio_get_vfio_device(pi_info.fd);
+   if (IS_ERR(vdev))
+   return PTR_ERR(vdev);
+
+   dev = kvm_vfio_external_base_device(vdev);
+   if (!dev || !dev_is_pci(dev)) {
+   ret = -EFAULT;
+   goto put_vfio_device;
+   }
+
+   pdev = to_pci_dev(dev);
+
+   max = kvm_vfio_pci_get_irq_count(pdev, pi_info.index);
+   if (max <= 0) {
+   ret = -EFAULT;
+   goto put_vfio_device;
+   }
+
+   if (pi_info.argsz - minsz < pi_info.count * sizeof(int) ||
+   pi_info.start >= max || pi_info.start + pi_info.count > max) {
+   ret = -EINVAL;
+   goto put_vfio_device;
+   }
+
+   gsi = memdup_user((void __user *)((unsigned long)argp + minsz),
+  pi_info.count * sizeof(int));
+   if (IS_ERR(gsi)) {
+   ret = PTR_ERR(gsi);
+   goto put_vfio_device;
+   }
+
+#ifdef CONFIG_PCI_MSI
+   for (i = 0; i < pi_info.count; i++) {
+   list_for_each_entry(entry, &pdev->msi_list, list) {
+   if (entry->msi_attrib.entry_nr != pi_info.start+i)
+   continue;
+
+   ret = kvm_arch_vfio_update_pi_irte(kdev->kvm,
+  entry->irq,
+  gsi[i]);
+   if (ret) {
+   ret = -EFAULT;
+   goto free_gsi;
+   }
+   }
+   }
+#endif
+
+   ret = 0;
+
+free_gsi:
+   kfree(gsi);
+
+put_vfio_device:
+   kvm_vfio_put_vfio_device(vdev);
+   return ret;
+}
+
 static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg)
 {
int32_t __user *argp = (int32_t __user *)(unsigned long)arg;
@@ -456,6 +549,11 @@ static int kvm_vfio_set_device(struct kvm_device *kdev, 
long attr, u64 arg)
case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ:
ret = kvm_vfio_control_irq_forward(kdev, attr, argp);
break;
+#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POSTING
+   case KVM_DEV_VFIO_DEVICE_POSTING_IRQ:
+   ret = kv

[v2 11/25] KVM: Add some helper functions for Posted-Interrupts

2014-12-02 Thread Feng Wu
This patch adds some helper functions to manipulate the
Posted-Interrupts Descriptor.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/vmx.c |   26 ++
 1 files changed, 26 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index abdb84f..0b1383e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -408,6 +408,8 @@ struct nested_vmx {
 };
 
 #define POSTED_INTR_ON  0
+#define POSTED_INTR_SN  1
+
 /* Posted-Interrupt Descriptor */
 struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */
@@ -443,6 +445,30 @@ static int pi_test_and_set_pir(int vector, struct pi_desc 
*pi_desc)
return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
 }
 
+static void pi_clear_sn(struct pi_desc *pi_desc)
+{
+   return clear_bit(POSTED_INTR_SN,
+   (unsigned long *)&pi_desc->control);
+}
+
+static void pi_set_sn(struct pi_desc *pi_desc)
+{
+   return set_bit(POSTED_INTR_SN,
+   (unsigned long *)&pi_desc->control);
+}
+
+static int pi_test_on(struct pi_desc *pi_desc)
+{
+   return test_bit(POSTED_INTR_ON,
+   (unsigned long *)&pi_desc->control);
+}
+
+static int pi_test_sn(struct pi_desc *pi_desc)
+{
+   return test_bit(POSTED_INTR_SN,
+   (unsigned long *)&pi_desc->control);
+}
+
 struct vcpu_vmx {
struct kvm_vcpu   vcpu;
unsigned long host_rsp;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 15/25] KVM: Make struct kvm_irq_routing_table accessible

2014-12-02 Thread Feng Wu
Move struct kvm_irq_routing_table from irqchip.c to kvm_host.h,
so we can use it outside of irqchip.c.

Signed-off-by: Feng Wu 
---
 include/linux/kvm_host.h |   19 +++
 virt/kvm/irqchip.c   |   11 ---
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0b9659d..cfa85ac 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -335,6 +335,25 @@ struct kvm_kernel_irq_routing_entry {
struct hlist_node link;
 };
 
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+
+struct kvm_irq_routing_table {
+   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+   struct kvm_kernel_irq_routing_entry *rt_entries;
+   u32 nr_rt_entries;
+   /*
+* Array indexed by gsi. Each entry contains list of irq chips
+* the gsi is connected to.
+*/
+   struct hlist_head map[0];
+};
+
+#else
+
+struct kvm_irq_routing_table {};
+
+#endif
+
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
 #endif
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 7f256f3..cdf29a6 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,17 +31,6 @@
 #include 
 #include "irq.h"
 
-struct kvm_irq_routing_table {
-   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
-   struct kvm_kernel_irq_routing_entry *rt_entries;
-   u32 nr_rt_entries;
-   /*
-* Array indexed by gsi. Each entry contains list of irq chips
-* the gsi is connected to.
-*/
-   struct hlist_head map[0];
-};
-
 int kvm_irq_map_gsi(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *entries, int gsi)
 {
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 14/25] KVM: Get Posted-Interrupts descriptor address from struct kvm_vcpu

2014-12-02 Thread Feng Wu
Define a interface to get PI descriptor address from the vCPU structure.

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/kvm_host.h |1 +
 arch/x86/kvm/vmx.c  |   12 
 2 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7a41808..9b45b78 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -772,6 +772,7 @@ struct kvm_x86_ops {
int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
 
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
+   u64 (*get_pi_desc_addr)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 66ca275..81f239b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -562,6 +562,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu 
*vcpu)
return container_of(vcpu, struct vcpu_vmx, vcpu);
 }
 
+struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
+{
+   return &(to_vmx(vcpu)->pi_desc);
+}
+
 #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
 #define FIELD(number, name)[number] = VMCS12_OFFSET(name)
 #define FIELD64(number, name)  [number] = VMCS12_OFFSET(name), \
@@ -4298,6 +4303,11 @@ static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu 
*vcpu)
return;
 }
 
+static u64 vmx_get_pi_desc_addr(struct kvm_vcpu *vcpu)
+{
+   return __pa((u64)vcpu_to_pi_desc(vcpu));
+}
+
 /*
  * Set up the vmcs's constant host-state fields, i.e., host-state fields that
  * will not change in the lifetime of the guest.
@@ -9244,6 +9254,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.check_nested_events = vmx_check_nested_events,
 
.sched_in = vmx_sched_in,
+
+   .get_pi_desc_addr = vmx_get_pi_desc_addr,
 };
 
 static int __init vmx_init(void)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 12/25] KVM: Initialize VT-d Posted-Interrupts Descriptor

2014-12-02 Thread Feng Wu
This patch initializes the VT-d Posted-Interrupts Descriptor.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/vmx.c |   27 +++
 1 files changed, 27 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0b1383e..66ca275 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -45,6 +45,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "trace.h"
 
@@ -4433,6 +4434,30 @@ static void ept_set_mmio_spte_mask(void)
kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
 }
 
+static void pi_desc_init(struct vcpu_vmx *vmx)
+{
+   unsigned int dest;
+
+   if (!irq_remapping_cap(IRQ_POSTING_CAP))
+   return;
+
+   /*
+* Initialize Posted-Interrupt Descriptor
+*/
+
+   pi_clear_sn(&vmx->pi_desc);
+   vmx->pi_desc.nv = POSTED_INTR_VECTOR;
+
+   /* Physical mode for Notificaiton Event */
+   vmx->pi_desc.ndm = 0;
+   dest = cpu_physical_id(vmx->vcpu.cpu);
+
+   if (x2apic_enabled())
+   vmx->pi_desc.ndst = dest;
+   else
+   vmx->pi_desc.ndst = (dest << 8) & 0xFF00;
+}
+
 /*
  * Sets up the vmcs for emulated real mode.
  */
@@ -4476,6 +4501,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
+
+   pi_desc_init(vmx);
}
 
if (ple_gap) {
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 09/25] iommu, x86: define irq_remapping_cap()

2014-12-02 Thread Feng Wu
This patch adds a new interface irq_remapping_cap() to detect
whether irq remapping supports new features, such as VT-d
Posted-Interrupts. We export this function out, so that KVM
code can check this and use this mechanism properly.

Signed-off-by: Feng Wu 
Reviewed-by: Jiang Liu 
---
 arch/x86/include/asm/irq_remapping.h |2 ++
 drivers/iommu/irq_remapping.c|   12 
 2 files changed, 14 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/irq_remapping.h 
b/arch/x86/include/asm/irq_remapping.h
index f87ac70..b3ad067 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -37,6 +37,7 @@ enum irq_remap_cap {
 
 extern void setup_irq_remapping_ops(void);
 extern int irq_remapping_supported(void);
+extern bool irq_remapping_cap(enum irq_remap_cap cap);
 extern void set_irq_remapping_broken(void);
 extern int irq_remapping_prepare(void);
 extern int irq_remapping_enable(void);
@@ -69,6 +70,7 @@ struct vcpu_data {
 
 static inline void setup_irq_remapping_ops(void) { }
 static inline int irq_remapping_supported(void) { return 0; }
+static bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; }
 static inline void set_irq_remapping_broken(void) { }
 static inline int irq_remapping_prepare(void) { return -ENODEV; }
 static inline int irq_remapping_enable(void) { return -ENODEV; }
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index e63e969..b008663 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -103,6 +103,18 @@ int irq_remapping_supported(void)
return remap_ops->supported();
 }
 
+bool irq_remapping_cap(enum irq_remap_cap cap)
+{
+   if (disable_irq_post)
+   return 0;
+
+   if (!remap_ops || !remap_ops->capability)
+   return 0;
+
+   return remap_ops->capability(cap);
+}
+EXPORT_SYMBOL_GPL(irq_remapping_cap);
+
 int __init irq_remapping_prepare(void)
 {
if (!remap_ops || !remap_ops->prepare)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 13/25] KVM: Define a new interface kvm_find_dest_vcpu() for VT-d PI

2014-12-02 Thread Feng Wu
This patch defines a new interface kvm_find_dest_vcpu for
VT-d PI, which can returns the destination vCPU of the
interrupt for guests.

Since VT-d PI cannot handle broadcast/multicast interrupt,
Here we only handle Fixed and Lowest priority interrupts.

The current method of handling guest lowest priority interrtups
is to use a counter 'apic_arb_prio' for each vCPU, we choose the
vCPU with smallest 'apic_arb_prio' and then increase it by 1.
However, for VT-d PI, we cannot re-use this, since we no longer
have control to 'apic_arb_prio' with posted interrupt direct
delivery by Hardware.

Here, we introduce a similar way with 'apic_arb_prio' to handle
guest lowest priority interrtups when VT-d PI is used. Here is the
ideas:
- Each vCPU has a counter 'round_robin_counter'.
- When guests sets an interrupts to lowest priority, we choose
the vCPU with smallest 'round_robin_counter' as the destination,
then increase it.

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/kvm_host.h |4 +++
 virt/kvm/irq_comm.c |   41 +++
 2 files changed, 45 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ed0c30..7a41808 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -358,6 +358,7 @@ struct kvm_vcpu_arch {
struct kvm_lapic *apic;/* kernel irqchip context */
unsigned long apic_attention;
int32_t apic_arb_prio;
+   int32_t round_robin_counter;
int mp_state;
u64 ia32_misc_enable_msr;
bool tpr_access_reporting;
@@ -1093,4 +1094,7 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, 
u64 *data);
 void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
 void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
 
+bool kvm_find_dest_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+   struct kvm_vcpu **dest_vcpu);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 963b899..f3c5d69 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -317,6 +317,47 @@ out:
return r;
 }
 
+int kvm_compare_rr_counter(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
+{
+   return vcpu1->arch.round_robin_counter -
+   vcpu2->arch.round_robin_counter;
+}
+
+bool kvm_find_dest_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+   struct kvm_vcpu **dest_vcpu)
+{
+   int i, r = 0;
+   struct kvm_vcpu *vcpu, *dest = NULL;
+
+   kvm_for_each_vcpu(i, vcpu, kvm) {
+   if (!kvm_apic_present(vcpu))
+   continue;
+
+   if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
+   irq->dest_id, irq->dest_mode))
+   continue;
+
+   if (!kvm_is_dm_lowest_prio(irq)) {
+   r++;
+   *dest_vcpu = vcpu;
+   } else if (kvm_lapic_enabled(vcpu)) {
+   if (!dest)
+   dest = vcpu;
+   else if (kvm_compare_rr_counter(vcpu, dest) < 0)
+   dest = vcpu;
+   }
+   }
+
+   if (dest) {
+   dest->arch.round_robin_counter++;
+   *dest_vcpu = dest;
+   return true;
+   } else if (r == 1)
+   return true;
+
+   return false;
+}
+
 #define IOAPIC_ROUTING_ENTRY(irq) \
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,  \
  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 10/25] KVM: change struct pi_desc for VT-d Posted-Interrupts

2014-12-02 Thread Feng Wu
Change struct pi_desc for VT-d Posted-Interrupts.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/vmx.c |   15 +--
 1 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3e556c6..abdb84f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -411,8 +411,19 @@ struct nested_vmx {
 /* Posted-Interrupt Descriptor */
 struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */
-   u32 control;/* bit 0 of control is outstanding notification bit */
-   u32 rsvd[7];
+   union {
+   struct {
+   u64 on  : 1,
+   sn  : 1,
+   rsvd_1  : 13,
+   ndm : 1,
+   nv  : 8,
+   rsvd_2  : 8,
+   ndst: 32;
+   };
+   u64 control;
+   };
+   u32 rsvd[6];
 } __aligned(64);
 
 static bool pi_test_and_set_on(struct pi_desc *pi_desc)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 08/25] iommu, x86: Add intel_irq_remapping_capability() for Intel

2014-12-02 Thread Feng Wu
Add the Intel side implementation for capability in
struct irq_remap_ops.

Signed-off-by: Feng Wu 
Reviewed-by: Jiang Liu 
---
 drivers/iommu/intel_irq_remapping.c |   27 +++
 drivers/iommu/irq_remapping.c   |2 ++
 drivers/iommu/irq_remapping.h   |4 
 3 files changed, 33 insertions(+), 0 deletions(-)

diff --git a/drivers/iommu/intel_irq_remapping.c 
b/drivers/iommu/intel_irq_remapping.c
index 01786a8..827aeff 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -651,6 +651,32 @@ error:
return -1;
 }
 
+static bool intel_irq_remapping_capability(enum irq_remap_cap cap)
+{
+   struct dmar_drhd_unit *drhd;
+   struct intel_iommu *iommu;
+
+   switch (cap) {
+   case IRQ_POSTING_CAP:
+   /*
+* If 1) posted-interrupts is disabled by user
+* or 2) irq remapping is disabled, posted-interrupts
+* is not supported.
+*/
+   if (disable_irq_post || !irq_remapping_enabled)
+   return 0;
+
+   for_each_iommu(iommu, drhd)
+   if (!cap_pi_support(iommu->cap))
+   return 0;
+
+   return 1;
+   default:
+   pr_warn("Unknown irq remapping capability.\n");
+   return 0;
+   }
+}
+
 static int ir_parse_one_hpet_scope(struct acpi_dmar_device_scope *scope,
   struct intel_iommu *iommu,
   struct acpi_dmar_hardware_unit *drhd)
@@ -947,6 +973,7 @@ static struct irq_domain *intel_get_irq_domain(struct 
irq_alloc_info *info)
 
 struct irq_remap_ops intel_irq_remap_ops = {
.supported  = intel_irq_remapping_supported,
+   .capability = intel_irq_remapping_capability,
.prepare= dmar_table_init,
.enable = intel_enable_irq_remapping,
.disable= disable_irq_remapping,
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 3c3da04..e63e969 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -24,6 +24,8 @@ int irq_remap_broken;
 int disable_sourceid_checking;
 int no_x2apic_optout;
 
+int disable_irq_post = 1;
+
 static struct irq_remap_ops *remap_ops;
 
 static void irq_remapping_disable_io_apic(void)
diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
index 2d991b2..cb1f46d 100644
--- a/drivers/iommu/irq_remapping.h
+++ b/drivers/iommu/irq_remapping.h
@@ -36,6 +36,8 @@ extern int disable_sourceid_checking;
 extern int no_x2apic_optout;
 extern int irq_remapping_enabled;
 
+extern int disable_irq_post;
+
 struct irq_remap_ops {
/* Check whether Interrupt Remapping is supported */
int (*supported)(void);
@@ -76,6 +78,8 @@ extern void ir_ack_apic_edge(struct irq_data *data);
 #define disable_irq_remap 1
 #define irq_remap_broken  0
 
+#define disable_irq_post  1
+
 #endif /* CONFIG_IRQ_REMAP */
 
 #endif /* __IRQ_REMAPPING_H */
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 07/25] iommu, x86: Add cap_pi_support() to detect VT-d PI capability

2014-12-02 Thread Feng Wu
Add helper function to detect VT-d Posted-Interrupts capability.

Signed-off-by: Feng Wu 
Reviewed-by: Jiang Liu 
---
 include/linux/intel-iommu.h |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index ecaf3a9..8174ae8 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -87,6 +87,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 /*
  * Decoding Capability Register
  */
+#define cap_pi_support(c)  (((c) >> 59) & 1)
 #define cap_read_drain(c)  (((c) >> 55) & 1)
 #define cap_write_drain(c) (((c) >> 54) & 1)
 #define cap_max_amask_val(c)   (((c) >> 48) & 0x3f)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 04/25] iommu, x86: Implement irq_set_vcpu_affinity for intel_ir_chip

2014-12-02 Thread Feng Wu
Implement irq_set_vcpu_affinity for intel_ir_chip.

Signed-off-by: Feng Wu 
Reviewed-by: Jiang Liu 
---
 arch/x86/include/asm/irq_remapping.h |5 +
 drivers/iommu/intel_irq_remapping.c  |   27 +++
 2 files changed, 32 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/irq_remapping.h 
b/arch/x86/include/asm/irq_remapping.h
index f67ae08..f87ac70 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -60,6 +60,11 @@ static inline struct irq_domain 
*arch_get_ir_parent_domain(void)
return x86_vector_domain;
 }
 
+struct vcpu_data {
+   u64 pi_desc_addr;   /* Physical address of PI Descriptor */
+   u32 vector; /* Guest vector of the interrupt */
+};
+
 #else  /* CONFIG_IRQ_REMAP */
 
 static inline void setup_irq_remapping_ops(void) { }
diff --git a/drivers/iommu/intel_irq_remapping.c 
b/drivers/iommu/intel_irq_remapping.c
index f6da3b2..749cb93 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -1010,10 +1010,37 @@ static void intel_ir_compose_msi_msg(struct irq_data 
*irq_data,
*msg = ir_data->msi_entry;
 }
 
+static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
+{
+   struct intel_ir_data *ir_data = data->chip_data;
+   struct irte *irte = &ir_data->irte_entry;
+   struct irte_pi *irte_pi = (struct irte_pi *)irte;
+   struct vcpu_data *vcpu_pi_info = (struct vcpu_data *)vcpu_info;
+
+   irte_pi->urg = 0;
+   irte_pi->vector = vcpu_pi_info->vector;
+   irte_pi->pda_l = (vcpu_pi_info->pi_desc_addr >> (32 - PDA_LOW_BIT)) &
+~(-1UL << PDA_LOW_BIT);
+   irte_pi->pda_h = (vcpu_pi_info->pi_desc_addr >> 32) &
+~(-1UL << PDA_HIGH_BIT);
+
+   irte_pi->__reserved_1 = 0;
+   irte_pi->__reserved_2 = 0;
+   irte_pi->__reserved_3 = 0;
+   irte_pi->__reserved_4 = 0;
+
+   irte_pi->pst = 1;
+
+   modify_irte(&ir_data->irq_2_iommu, irte);
+
+   return 0;
+}
+
 static struct irq_chip intel_ir_chip = {
.irq_ack = ir_ack_apic_edge,
.irq_set_affinity = intel_ir_set_affinity,
.irq_compose_msi_msg = intel_ir_compose_msi_msg,
+   .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
 };
 
 static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data,
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 06/25] iommu, x86: No need to migrating irq for VT-d Posted-Interrupts

2014-12-02 Thread Feng Wu
We don't need to migrate the irqs for VT-d Posted-Interrupts here.
When 'pst' is set in IRTE, the associated irq will be posted to
guests instead of interrupt remapping. The destination of the
interrupt is set in Posted-Interrupts Descriptor, and the migration
happens during vCPU scheduling.

Signed-off-by: Feng Wu 
Reviewed-by: Jiang Liu 
---
 drivers/iommu/intel_irq_remapping.c |   10 ++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/drivers/iommu/intel_irq_remapping.c 
b/drivers/iommu/intel_irq_remapping.c
index 749cb93..01786a8 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -976,10 +976,20 @@ intel_ir_set_affinity(struct irq_data *data, const struct 
cpumask *mask,
 {
struct intel_ir_data *ir_data = data->chip_data;
struct irte *irte = &ir_data->irte_entry;
+   struct irte_pi *irte_pi = (struct irte_pi *)irte;
struct irq_cfg *cfg = irqd_cfg(data);
struct irq_data *parent = data->parent_data;
int ret;
 
+   /*
+* If the interrupt is for posting, it is used by guests,
+* we cannot set irq affinity here.
+*/
+   if (irte_pi->pst == 1) {
+   pr_warn("cannot set irq affinity for posted-interrupts\n");
+   return -EBUSY;
+   }
+
ret = parent->chip->irq_set_affinity(parent, mask, force);
if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
return ret;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 03/25] iommu, x86: Define new irte structure for VT-d Posted-Interrupts

2014-12-02 Thread Feng Wu
Add a new irte_pi structure for VT-d Posted-Interrupts.

Signed-off-by: Feng Wu 
Reviewed-by: Jiang Liu 
---
 include/linux/dmar.h |   32 
 1 files changed, 32 insertions(+), 0 deletions(-)

diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 8473756..c7f9cda 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -212,6 +212,38 @@ struct irte {
};
 };
 
+struct irte_pi {
+   union {
+   struct {
+   __u64   present : 1,
+   fpd : 1,
+   __reserved_1: 6,
+   avail   : 4,
+   __reserved_2: 2,
+   urg : 1,
+   pst : 1,
+   vector  : 8,
+   __reserved_3: 14,
+   pda_l   : 26;
+   };
+   __u64 low;
+   };
+
+   union {
+   struct {
+   __u64   sid : 16,
+   sq  : 2,
+   svt : 2,
+   __reserved_4: 12,
+   pda_h   : 32;
+   };
+   __u64 high;
+   };
+};
+
+#define PDA_LOW_BIT26
+#define PDA_HIGH_BIT   32
+
 enum {
IRQ_REMAP_XAPIC_MODE,
IRQ_REMAP_X2APIC_MODE,
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 05/25] x86, irq: Implement irq_set_vcpu_affinity for pci_msi_ir_controller

2014-12-02 Thread Feng Wu
Implement irq_set_vcpu_affinity for pci_msi_ir_controller.

Signed-off-by: Feng Wu 
Reviewed-by: Jiang Liu 
---
 arch/x86/kernel/apic/msi.c |1 +
 include/linux/irq.h|3 +++
 2 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index da163da..b0ed073 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -152,6 +152,7 @@ static struct irq_chip pci_msi_ir_controller = {
.irq_mask   = pci_msi_mask_irq,
.irq_ack= irq_chip_ack_parent,
.irq_retrigger  = irq_chip_retrigger_hierarchy,
+   .irq_set_vcpu_affinity  = irq_chip_set_vcpu_affinity_parent,
.flags  = IRQCHIP_SKIP_SET_WAKE,
 };
 
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 83abafc..5dcaa7f 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -464,6 +464,9 @@ extern void irq_chip_eoi_parent(struct irq_data *data);
 extern int irq_chip_set_affinity_parent(struct irq_data *data,
const struct cpumask *dest,
bool force);
+extern int irq_chip_set_vcpu_affinity_parent(struct irq_data *data,
+void *vcpu_info);
+
 #endif
 
 static inline void irq_chip_write_msi_msg(struct irq_data *data,
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 02/25] iommu: Add new member capability to struct irq_remap_ops

2014-12-02 Thread Feng Wu
This patch adds a new member capability to struct irq_remap_ops,
this new function ops can be used to check whether some
features are supported, such as VT-d Posted-Interrupts.

Signed-off-by: Feng Wu 
Reviewed-by: Jiang Liu 
---
 arch/x86/include/asm/irq_remapping.h |4 
 drivers/iommu/irq_remapping.h|4 
 2 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/irq_remapping.h 
b/arch/x86/include/asm/irq_remapping.h
index 6ba2431..f67ae08 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -31,6 +31,10 @@ struct irq_alloc_info;
 
 #ifdef CONFIG_IRQ_REMAP
 
+enum irq_remap_cap {
+   IRQ_POSTING_CAP = 0,
+};
+
 extern void setup_irq_remapping_ops(void);
 extern int irq_remapping_supported(void);
 extern void set_irq_remapping_broken(void);
diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
index 4bd791d..2d991b2 100644
--- a/drivers/iommu/irq_remapping.h
+++ b/drivers/iommu/irq_remapping.h
@@ -28,6 +28,7 @@ struct irq_data;
 struct msi_msg;
 struct irq_domain;
 struct irq_alloc_info;
+enum irq_remap_cap;
 
 extern int disable_irq_remap;
 extern int irq_remap_broken;
@@ -39,6 +40,9 @@ struct irq_remap_ops {
/* Check whether Interrupt Remapping is supported */
int (*supported)(void);
 
+   /* Check some capability is supported */
+   bool (*capability)(enum irq_remap_cap);
+
/* Initializes hardware and makes it ready for remapping interrupts */
int  (*prepare)(void);
 
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 01/25] genirq: Introduce irq_set_vcpu_affinity() to target an interrupt to a VCPU

2014-12-02 Thread Feng Wu
From: Jiang Liu 

With Posted-Interrupts support in Intel CPU and IOMMU, an external
interrupt from assigned-devices could be directly delivered to a
virtual CPU in a virtual machine. Instead of hacking KVM and Intel
IOMMU drivers, we propose a platform independent interface to target
an interrupt to a specific virtual CPU in a virtual machine, or set
virtual CPU affinity for an interrupt.

By adopting this new interface and the hierarchy irqdomain, we could
easily support posted-interrupts on Intel platforms, and also provide
flexible enough interfaces for other platforms to support similar
features.

We may also cooperate between set_affinity() and set_vcpu_affinity()
in IRQ core or irq chip drivers.

Here is the usage scenario for this interface:
Guest update MSI/MSI-X interrupt configuration
-->QEMU and KVM handle this
-->KVM call this interface (passing posted interrupts descriptor
   and guest vector)
-->irq core will transfer the control to IOMMU
-->IOMMU will do the real work of updating IRTE (IRTE has new
   format for VT-d Posted-Interrupts)

Signed-off-by: Jiang Liu 
Signed-off-by: Feng Wu 
---
 include/linux/irq.h |4 
 kernel/irq/chip.c   |   14 ++
 kernel/irq/manage.c |   20 
 3 files changed, 38 insertions(+), 0 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index f26e736..83abafc 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -324,6 +324,8 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data 
*d)
  * irq_request_resources
  * @irq_compose_msi_msg:   optional to compose message content for MSI
  * @irq_write_msi_msg: optional to write message content for MSI
+ * @irq_set_vcpu_affinity: optional to target a virtual CPU in a virtual
+ * machine
  * @flags: chip specific flags
  */
 struct irq_chip {
@@ -362,6 +364,7 @@ struct irq_chip {
 
void(*irq_compose_msi_msg)(struct irq_data *data, struct 
msi_msg *msg);
void(*irq_write_msi_msg)(struct irq_data *data, struct 
msi_msg *msg);
+   int (*irq_set_vcpu_affinity)(struct irq_data *data, void 
*vcpu_info);
 
unsigned long   flags;
 };
@@ -416,6 +419,7 @@ extern void irq_cpu_online(void);
 extern void irq_cpu_offline(void);
 extern int irq_set_affinity_locked(struct irq_data *data,
   const struct cpumask *cpumask, bool force);
+extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info);
 
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
 void irq_move_irq(struct irq_data *data);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 6f1c7a5..fe0908f 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -948,6 +948,20 @@ int irq_chip_retrigger_hierarchy(struct irq_data *data)
 
return -ENOSYS;
 }
+
+/**
+ * irq_chip_set_vcpu_affinity_parent - Set vcpu affinity on the parent 
interrupt
+ * @data:  Pointer to interrupt specific data
+ * @dest:  The vcpu affinity information
+ */
+int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, void *vcpu_info)
+{
+   data = data->parent_data;
+   if (data->chip->irq_set_vcpu_affinity)
+   return data->chip->irq_set_vcpu_affinity(data, vcpu_info);
+
+   return -ENOSYS;
+}
 #endif
 
 /**
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 8069237..bd3a1ba 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -247,6 +247,26 @@ int irq_set_affinity_hint(unsigned int irq, const struct 
cpumask *m)
 }
 EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
 
+int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info)
+{
+   struct irq_desc *desc = irq_to_desc(irq);
+   struct irq_chip *chip;
+   unsigned long flags;
+   int ret = -ENOSYS;
+
+   if (!desc)
+   return -EINVAL;
+
+   raw_spin_lock_irqsave(&desc->lock, flags);
+   chip = desc->irq_data.chip;
+   if (chip && chip->irq_set_vcpu_affinity)
+   ret = chip->irq_set_vcpu_affinity(irq_desc_get_irq_data(desc),
+ vcpu_info);
+   raw_spin_unlock_irqrestore(&desc->lock, flags);
+   return ret;
+}
+EXPORT_SYMBOL_GPL(irq_set_vcpu_affinity);
+
 static void irq_affinity_notify(struct work_struct *work)
 {
struct irq_affinity_notify *notify =
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2 00/25] Add VT-d Posted-Interrupts support

2014-12-03 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

v1->v2:
* Use VFIO framework to enable this feature, the VFIO part of this series is
  base on Eric's patch "[PATCH v3 0/8] KVM-VFIO IRQ forward control"
* Rebase this patchset on 
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git,
  then revise some irq logic based on the new hierarchy irqdomain patches 
provided
  by Jiang Liu 

This patch series is made of the following groups:
1-6: Some preparation changes in iommu and irq component, this is based on the
 new hierarchy irqdomain logic.
7-9, 25: IOMMU changes for VT-d Posted-Interrupts, such as, feature detection,
  command line parameter.
10-16, 21-24: Changes related to KVM itself.
17-19: Changes in VFIO component, this part was previously sent out as
"[RFC PATCH v2 0/2] kvm-vfio: implement the vfio skeleton for VT-d 
Posted-Interrupts"
20: x86 irq related changes

Feng Wu (25):
  genirq: Introduce irq_set_vcpu_affinity() to target an interrupt to a
VCPU
  iommu: Add new member capability to struct irq_remap_ops
  iommu, x86: Define new irte structure for VT-d Posted-Interrupts
  iommu, x86: Implement irq_set_vcpu_affinity for intel_ir_chip
  x86, irq: Implement irq_set_vcpu_affinity for pci_msi_ir_controller
  iommu, x86: No need to migrating irq for VT-d Posted-Interrupts
  iommu, x86: Add cap_pi_support() to detect VT-d PI capability
  iommu, x86: Add intel_irq_remapping_capability() for Intel
  iommu, x86: define irq_remapping_cap()
  KVM: change struct pi_desc for VT-d Posted-Interrupts
  KVM: Add some helper functions for Posted-Interrupts
  KVM: Initialize VT-d Posted-Interrupts Descriptor
  KVM: Define a new interface kvm_find_dest_vcpu() for VT-d PI
  KVM: Get Posted-Interrupts descriptor address from struct kvm_vcpu
  KVM: Make struct kvm_irq_routing_table accessible
  KVM: make kvm_set_msi_irq() public
  KVM: kvm-vfio: User API for VT-d Posted-Interrupts
  KVM: kvm-vfio: implement the VFIO skeleton for VT-d Posted-Interrupts
  KVM: x86: kvm-vfio: VT-d posted-interrupts setup
  x86, irq: Define a global vector for VT-d Posted-Interrupts
  KVM: Update Posted-Interrupts descriptor during vCPU scheduling
  KVM: Change NDST field after vCPU scheduling
  KVM: Add the handler for Wake-up Vector
  KVM: Suppress posted-interrupt when 'SN' is set
  iommu/vt-d: Add a command line parameter for VT-d posted-interrupts

 Documentation/kernel-parameters.txt|1 +
 Documentation/virtual/kvm/devices/vfio.txt |9 +
 arch/x86/include/asm/entry_arch.h  |2 +
 arch/x86/include/asm/hardirq.h |1 +
 arch/x86/include/asm/hw_irq.h  |2 +
 arch/x86/include/asm/irq_remapping.h   |   11 ++
 arch/x86/include/asm/irq_vectors.h |1 +
 arch/x86/include/asm/kvm_host.h|   14 ++
 arch/x86/kernel/apic/msi.c |1 +
 arch/x86/kernel/entry_64.S |2 +
 arch/x86/kernel/irq.c  |   27 +++
 arch/x86/kernel/irqinit.c  |2 +
 arch/x86/kvm/Makefile  |2 +-
 arch/x86/kvm/kvm_vfio_x86.c|   68 
 arch/x86/kvm/vmx.c |  251 +++-
 arch/x86/kvm/x86.c |   38 -
 drivers/iommu/intel_irq_remapping.c|   64 +++
 drivers/iommu/irq_remapping.c  |   24 +++-
 drivers/iommu/irq_remapping.h  |8 +
 include/linux/dmar.h   |   32 
 include/linux/intel-iommu.h|1 +
 include/linux/irq.h|7 +
 include/linux/kvm_host.h   |   43 +
 include/uapi/linux/kvm.h   |   10 +
 kernel/irq/chip.c  |   14 ++
 kernel/irq/manage.c|   20 +++
 virt/kvm/irq_comm.c|   43 +-
 virt/kvm/irqchip.c |   11 --
 virt/kvm/kvm_main.c|   14 ++
 virt/kvm/vfio.c|  103 
 30 files changed, 799 insertions(+), 27 deletions(-)
 create mode 100644 arch/x86/kvm/kvm_vfio_x86.c

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4] KVM: Add SMAP support when setting CR4

2014-03-26 Thread Feng Wu
This patch adds SMAP handling logic when setting CR4 for guests

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/cpuid.h |  8 
 arch/x86/kvm/mmu.c   | 22 +++---
 arch/x86/kvm/mmu.h   |  2 ++
 arch/x86/kvm/x86.c   |  6 ++
 4 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index f1e4895..63124a2 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -48,6 +48,14 @@ static inline bool guest_cpuid_has_smep(struct kvm_vcpu 
*vcpu)
return best && (best->ebx & bit(X86_FEATURE_SMEP));
 }
 
+static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu)
+{
+   struct kvm_cpuid_entry2 *best;
+
+   best = kvm_find_cpuid_entry(vcpu, 7, 0);
+   return best && (best->ebx & bit(X86_FEATURE_SMAP));
+}
+
 static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
 {
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 40772ef..33e656c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3591,14 +3591,15 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu 
*vcpu,
}
 }
 
-static void update_permission_bitmask(struct kvm_vcpu *vcpu,
+void update_permission_bitmask(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu, bool ept)
 {
unsigned bit, byte, pfec;
u8 map;
-   bool fault, x, w, u, wf, uf, ff, smep;
+   bool fault, x, w, u, wf, uf, ff, smep, smap;
 
smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+   smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
pfec = byte << 1;
map = 0;
@@ -3617,11 +3618,26 @@ static void update_permission_bitmask(struct kvm_vcpu 
*vcpu,
w |= !is_write_protection(vcpu) && !uf;
/* Disallow supervisor fetches of user code if 
cr4.smep */
x &= !(smep && u && !uf);
+
+   /*
+* SMAP:kernel-mode data accesses from user-mode
+* mappings should fault. A fault is considered
+* as a SMAP violation if all of the following
+* conditions are ture:
+*   - X86_CR4_SMAP is set in CR4
+*   - An user page is accessed
+*   - !(CPL<3 && X86_EFLAGS_AC is set)
+*   - Page fault in kernel mode
+*/
+   smap = smap && u && !uf &&
+   !((kvm_x86_ops->get_cpl(vcpu) < 3) &&
+   ((kvm_x86_ops->get_rflags(vcpu) &
+   X86_EFLAGS_AC) == 1));
} else
/* Not really needed: no U/S accesses on ept  */
u = 1;
 
-   fault = (ff && !x) || (uf && !u) || (wf && !w);
+   fault = (ff && !x) || (uf && !u) || (wf && !w) || smap;
map |= fault << bit;
}
mmu->permissions[byte] = map;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 2926152..8820f78 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -73,6 +73,8 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 
addr, bool direct);
 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
bool execonly);
+void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+   bool ept);
 
 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
 {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4e33b85..f8293fb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -630,6 +630,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
return 1;
 
+   if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP))
+   return 1;
+
if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
return 1;
 
@@ -658,6 +661,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
(!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
kvm_mmu_reset_context(vcpu);
 
+   if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
+   update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
+

[PATCH 2/4] KVM: Remove SMAP bit from CR4_RESERVED_BITS.

2014-03-26 Thread Feng Wu
This patch removes SMAP bit from CR4_RESERVED_BITS.

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ae5d783..b673925 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -60,7 +60,7 @@
  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | 
X86_CR4_PCIDE \
  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
- | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
+ | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/4] KVM: Disable SMAP for guests in EPT realmode and EPT unpaging mode

2014-03-26 Thread Feng Wu
SMAP is disabled if CPU is in non-paging mode in hardware.
However KVM always uses paging mode to emulate guest non-paging
mode with TDP. To emulate this behavior, SMAP needs to be
manually disabled when guest switches to non-paging mode.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/vmx.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index dcc4de3..1d37e50 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3421,13 +3421,15 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned 
long cr4)
hw_cr4 &= ~X86_CR4_PAE;
hw_cr4 |= X86_CR4_PSE;
/*
-* SMEP is disabled if CPU is in non-paging mode in
-* hardware. However KVM always uses paging mode to
+* SMEP/SMAP is disabled if CPU is in non-paging mode
+* in hardware. However KVM always uses paging mode to
 * emulate guest non-paging mode with TDP.
-* To emulate this behavior, SMEP needs to be manually
-* disabled when guest switches to non-paging mode.
+* To emulate this behavior, SMEP/SMAP needs to be
+* manually disabled when guest switches to non-paging
+* mode.
 */
hw_cr4 &= ~X86_CR4_SMEP;
+   hw_cr4 &= ~X86_CR4_SMAP;
} else if (!(cr4 & X86_CR4_PAE)) {
hw_cr4 &= ~X86_CR4_PAE;
}
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/4] KVM: enable Intel SMAP for KVM

2014-03-26 Thread Feng Wu
Supervisor Mode Access Prevention (SMAP) is a new security feature 
disclosed by Intel, please refer to the following document: 

http://software.intel.com/sites/default/files/319433-014.pdf
 
Every access to a linear address is either a supervisor-mode access
or a user-mode access. All accesses performed while the current
privilege level (CPL) is less than 3 are supervisor-mode accesses.
If CPL = 3, accesses are generally user-mode accesses. However, some
operations implicitly access system data structures, and the resulting
accesses to those data structures are supervisor-mode accesses regardless
of CPL. Examples of such implicit supervisor accesses include the following:
accesses to the global descriptor table (GDT) or local descriptor table
(LDT) to load a segment descriptor; accesses to the interrupt descriptor
table (IDT) when delivering an interrupt or exception; and accesses to the
task-state segment (TSS) as part of a task switch or change of CPL.

If CR4.SMAP = 1, supervisor-mode data accesses are not allowed to linear
addresses that are accessible in user mode. If CPL < 3, SMAP protections
are disabled if EFLAGS.AC = 1. If CPL = 3, SMAP applies to all supervisor-mode
data accesses (these are implicit supervisor accesses) regardless of the
value of EFLAGS.AC.

This patchset pass-through SMAP feature to guests, and let guests
benefit from it.

Feng Wu (4):
  KVM: expose SMAP feature to guest
  KVM: Remove SMAP bit from CR4_RESERVED_BITS.
  KVM: Add SMAP support when setting CR4
  KVM: Disable SMAP for guests in EPT realmode and EPT unpaging mode

 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/cpuid.c|  2 +-
 arch/x86/kvm/cpuid.h|  8 
 arch/x86/kvm/mmu.c  | 22 +++---
 arch/x86/kvm/mmu.h  |  2 ++
 arch/x86/kvm/vmx.c  | 10 ++
 arch/x86/kvm/x86.c  |  6 ++
 7 files changed, 43 insertions(+), 9 deletions(-)

-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4] KVM: expose SMAP feature to guest

2014-03-26 Thread Feng Wu
This patch exposes SMAP feature to guest

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/cpuid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index c697625..deb5f9b 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -303,7 +303,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
/* cpuid 7.0.ebx */
const u32 kvm_supported_word9_x86_features =
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
-   F(BMI2) | F(ERMS) | f_invpcid | F(RTM);
+   F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | F(SMAP);
 
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4] KVM: Disable SMAP for guests in EPT realmode and EPT unpaging mode

2014-03-28 Thread Feng Wu
SMAP is disabled if CPU is in non-paging mode in hardware.
However KVM always uses paging mode to emulate guest non-paging
mode with TDP. To emulate this behavior, SMAP needs to be
manually disabled when guest switches to non-paging mode.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/vmx.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3927528..e58cb5f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3452,13 +3452,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned 
long cr4)
hw_cr4 &= ~X86_CR4_PAE;
hw_cr4 |= X86_CR4_PSE;
/*
-* SMEP is disabled if CPU is in non-paging mode in
-* hardware. However KVM always uses paging mode to
+* SMEP/SMAP is disabled if CPU is in non-paging mode
+* in hardware. However KVM always uses paging mode to
 * emulate guest non-paging mode with TDP.
-* To emulate this behavior, SMEP needs to be manually
-* disabled when guest switches to non-paging mode.
+* To emulate this behavior, SMEP/SMAP needs to be
+* manually disabled when guest switches to non-paging
+* mode.
 */
-   hw_cr4 &= ~X86_CR4_SMEP;
+   hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
} else if (!(cr4 & X86_CR4_PAE)) {
hw_cr4 &= ~X86_CR4_PAE;
}
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4] KVM: Remove SMAP bit from CR4_RESERVED_BITS.

2014-03-28 Thread Feng Wu
This patch removes SMAP bit from CR4_RESERVED_BITS.

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fdf83af..4eeb049 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -60,7 +60,7 @@
  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | 
X86_CR4_PCIDE \
  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
- | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
+ | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/4] KVM: enable Intel SMAP for KVM

2014-03-28 Thread Feng Wu
Supervisor Mode Access Prevention (SMAP) is a new security feature 
disclosed by Intel, please refer to the following document: 

http://software.intel.com/sites/default/files/319433-014.pdf
 
Every access to a linear address is either a supervisor-mode access
or a user-mode access. All accesses performed while the current
privilege level (CPL) is less than 3 are supervisor-mode accesses.
If CPL = 3, accesses are generally user-mode accesses. However, some
operations implicitly access system data structures, and the resulting
accesses to those data structures are supervisor-mode accesses regardless
of CPL. Examples of such implicit supervisor accesses include the following:
accesses to the global descriptor table (GDT) or local descriptor table
(LDT) to load a segment descriptor; accesses to the interrupt descriptor
table (IDT) when delivering an interrupt or exception; and accesses to the
task-state segment (TSS) as part of a task switch or change of CPL.

If CR4.SMAP = 1, supervisor-mode data accesses are not allowed to linear
addresses that are accessible in user mode. If CPL < 3, SMAP protections
are disabled if EFLAGS.AC = 1. If CPL = 3, SMAP applies to all supervisor-mode
data accesses (these are implicit supervisor accesses) regardless of the
value of EFLAGS.AC.

This patchset pass-through SMAP feature to guests, and let guests
benefit from it.

Version 1:
  * Remove SMAP bit from CR4_RESERVED_BITS.
  * Add SMAP support when setting CR4
  * Disable SMAP for guests in EPT realmode and EPT unpaging mode
  * Expose SMAP feature to guest

Version 1:
  * Change the logic of updatinng mmu permission bitmap for SMAP violation
  * Expose SMAP feature to guest in the last patch of this series.

Feng Wu (4):
  KVM: Remove SMAP bit from CR4_RESERVED_BITS.
  KVM: Add SMAP support when setting CR4
  KVM: Disable SMAP for guests in EPT realmode and EPT unpaging mode
  KVM: expose SMAP feature to guest

 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/cpuid.c|  2 +-
 arch/x86/kvm/cpuid.h|  8 
 arch/x86/kvm/mmu.c  | 24 +---
 arch/x86/kvm/mmu.h  | 26 +++---
 arch/x86/kvm/paging_tmpl.h  |  2 +-
 arch/x86/kvm/vmx.c  | 11 ++-
 arch/x86/kvm/x86.c  |  9 -
 8 files changed, 69 insertions(+), 15 deletions(-)

-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/4] KVM: expose SMAP feature to guest

2014-03-28 Thread Feng Wu
This patch exposes SMAP feature to guest

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/cpuid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index c697625..deb5f9b 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -303,7 +303,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
/* cpuid 7.0.ebx */
const u32 kvm_supported_word9_x86_features =
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
-   F(BMI2) | F(ERMS) | f_invpcid | F(RTM);
+   F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | F(SMAP);
 
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/4] KVM: Add SMAP support when setting CR4

2014-03-28 Thread Feng Wu
This patch adds SMAP handling logic when setting CR4 for guests

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/cpuid.h   |  8 
 arch/x86/kvm/mmu.c | 24 +---
 arch/x86/kvm/mmu.h | 26 +++---
 arch/x86/kvm/paging_tmpl.h |  2 +-
 arch/x86/kvm/x86.c |  9 -
 5 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index a2a1bb7..eeecbed 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -48,6 +48,14 @@ static inline bool guest_cpuid_has_smep(struct kvm_vcpu 
*vcpu)
return best && (best->ebx & bit(X86_FEATURE_SMEP));
 }
 
+static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu)
+{
+   struct kvm_cpuid_entry2 *best;
+
+   best = kvm_find_cpuid_entry(vcpu, 7, 0);
+   return best && (best->ebx & bit(X86_FEATURE_SMAP));
+}
+
 static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
 {
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9b53135..83b7f8d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3601,20 +3601,22 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu 
*vcpu,
}
 }
 
-static void update_permission_bitmask(struct kvm_vcpu *vcpu,
+void update_permission_bitmask(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu, bool ept)
 {
unsigned bit, byte, pfec;
u8 map;
-   bool fault, x, w, u, wf, uf, ff, smep;
+   bool fault, x, w, u, wf, uf, ff, smapf, smep, smap;
 
smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+   smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
pfec = byte << 1;
map = 0;
wf = pfec & PFERR_WRITE_MASK;
uf = pfec & PFERR_USER_MASK;
ff = pfec & PFERR_FETCH_MASK;
+   smapf = pfec & PFERR_RSVD_MASK;
for (bit = 0; bit < 8; ++bit) {
x = bit & ACC_EXEC_MASK;
w = bit & ACC_WRITE_MASK;
@@ -3627,11 +3629,27 @@ static void update_permission_bitmask(struct kvm_vcpu 
*vcpu,
w |= !is_write_protection(vcpu) && !uf;
/* Disallow supervisor fetches of user code if 
cr4.smep */
x &= !(smep && u && !uf);
+
+   /*
+* SMAP:kernel-mode data accesses from user-mode
+* mappings should fault. A fault is considered
+* as a SMAP violation if all of the following
+* conditions are ture:
+*   - X86_CR4_SMAP is set in CR4
+*   - An user page is accessed
+*   - Page fault in kernel mode
+*   - !(CPL<3 && X86_EFLAGS_AC is set)
+*
+*   Here, we cover the first three conditions,
+*   we need to check CPL and X86_EFLAGS_AC in
+*   permission_fault() dynamiccally
+*/
+   smap = smap && smapf && u && !uf;
} else
/* Not really needed: no U/S accesses on ept  */
u = 1;
 
-   fault = (ff && !x) || (uf && !u) || (wf && !w);
+   fault = (ff && !x) || (uf && !u) || (wf && !w) || smap;
map |= fault << bit;
}
mmu->permissions[byte] = map;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 2926152..9d7a0b3 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -73,6 +73,8 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 
addr, bool direct);
 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
bool execonly);
+void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+   bool ept);
 
 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
 {
@@ -110,10 +112,28 @@ static inline bool is_write_protection(struct kvm_vcpu 
*vcpu)
  * Will a fault with a given page-fault error code (pfec) cause a permission
  * fault with the given access (in ACC_* format)?
  */
-static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
-   unsigned pfec)
+static inline bool permission_fault(struct

[PATCH v3 0/4] KVM: enable Intel SMAP for KVM

2014-03-31 Thread Feng Wu
Supervisor Mode Access Prevention (SMAP) is a new security feature 
disclosed by Intel, please refer to the following document: 

http://software.intel.com/sites/default/files/319433-014.pdf
 
Every access to a linear address is either a supervisor-mode access
or a user-mode access. All accesses performed while the current
privilege level (CPL) is less than 3 are supervisor-mode accesses.
If CPL = 3, accesses are generally user-mode accesses. However, some
operations implicitly access system data structures, and the resulting
accesses to those data structures are supervisor-mode accesses regardless
of CPL. Examples of such implicit supervisor accesses include the following:
accesses to the global descriptor table (GDT) or local descriptor table
(LDT) to load a segment descriptor; accesses to the interrupt descriptor
table (IDT) when delivering an interrupt or exception; and accesses to the
task-state segment (TSS) as part of a task switch or change of CPL.

If CR4.SMAP = 1, supervisor-mode data accesses are not allowed to linear
addresses that are accessible in user mode. If CPL < 3, SMAP protections
are disabled if EFLAGS.AC = 1. If CPL = 3, SMAP applies to all supervisor-mode
data accesses (these are implicit supervisor accesses) regardless of the
value of EFLAGS.AC.

This patchset pass-through SMAP feature to guests, and let guests
benefit from it.

Version 1:
  * Remove SMAP bit from CR4_RESERVED_BITS.
  * Add SMAP support when setting CR4
  * Disable SMAP for guests in EPT realmode and EPT unpaging mode
  * Expose SMAP feature to guest

Version 2:
  * Change the logic of updating mmu permission bitmap for SMAP violation
  * Expose SMAP feature to guest in the last patch of this series.

Version 3:
  * Changes in update_permission_bitmask().
  * Use a branchless way suggested by Paolo Bonzini to detect SMAP
violation in permission_fault(). 

Feng Wu (4):
  KVM: Remove SMAP bit from CR4_RESERVED_BITS.
  KVM: Add SMAP support when setting CR4
  KVM: Disable SMAP for guests in EPT realmode and EPT unpaging mode
  KVM: expose SMAP feature to guest

 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/cpuid.c|  2 +-
 arch/x86/kvm/cpuid.h|  8 
 arch/x86/kvm/mmu.c  | 35 +---
 arch/x86/kvm/mmu.h  | 44 +
 arch/x86/kvm/paging_tmpl.h  |  2 +-
 arch/x86/kvm/vmx.c  | 11 ++-
 arch/x86/kvm/x86.c  |  9 -
 8 files changed, 93 insertions(+), 20 deletions(-)

-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 1/4] KVM: Remove SMAP bit from CR4_RESERVED_BITS.

2014-03-31 Thread Feng Wu
This patch removes SMAP bit from CR4_RESERVED_BITS.

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fdf83af..4eeb049 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -60,7 +60,7 @@
  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | 
X86_CR4_PCIDE \
  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
- | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
+ | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 3/4] KVM: Disable SMAP for guests in EPT realmode and EPT unpaging mode

2014-03-31 Thread Feng Wu
SMAP is disabled if CPU is in non-paging mode in hardware.
However KVM always uses paging mode to emulate guest non-paging
mode with TDP. To emulate this behavior, SMAP needs to be
manually disabled when guest switches to non-paging mode.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/vmx.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3927528..e58cb5f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3452,13 +3452,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned 
long cr4)
hw_cr4 &= ~X86_CR4_PAE;
hw_cr4 |= X86_CR4_PSE;
/*
-* SMEP is disabled if CPU is in non-paging mode in
-* hardware. However KVM always uses paging mode to
+* SMEP/SMAP is disabled if CPU is in non-paging mode
+* in hardware. However KVM always uses paging mode to
 * emulate guest non-paging mode with TDP.
-* To emulate this behavior, SMEP needs to be manually
-* disabled when guest switches to non-paging mode.
+* To emulate this behavior, SMEP/SMAP needs to be
+* manually disabled when guest switches to non-paging
+* mode.
 */
-   hw_cr4 &= ~X86_CR4_SMEP;
+   hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
} else if (!(cr4 & X86_CR4_PAE)) {
hw_cr4 &= ~X86_CR4_PAE;
}
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 4/4] KVM: expose SMAP feature to guest

2014-03-31 Thread Feng Wu
This patch exposes SMAP feature to guest

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/cpuid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index c697625..deb5f9b 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -303,7 +303,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
/* cpuid 7.0.ebx */
const u32 kvm_supported_word9_x86_features =
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
-   F(BMI2) | F(ERMS) | f_invpcid | F(RTM);
+   F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | F(SMAP);
 
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 2/4] KVM: Add SMAP support when setting CR4

2014-03-31 Thread Feng Wu
This patch adds SMAP handling logic when setting CR4 for guests

Thanks a lot to Paolo Bonzini for his suggestion to use the branchless
way to detect SMAP violation.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/cpuid.h   |  8 
 arch/x86/kvm/mmu.c | 35 ---
 arch/x86/kvm/mmu.h | 44 
 arch/x86/kvm/paging_tmpl.h |  2 +-
 arch/x86/kvm/x86.c |  9 -
 5 files changed, 85 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index a2a1bb7..eeecbed 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -48,6 +48,14 @@ static inline bool guest_cpuid_has_smep(struct kvm_vcpu 
*vcpu)
return best && (best->ebx & bit(X86_FEATURE_SMEP));
 }
 
+static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu)
+{
+   struct kvm_cpuid_entry2 *best;
+
+   best = kvm_find_cpuid_entry(vcpu, 7, 0);
+   return best && (best->ebx & bit(X86_FEATURE_SMAP));
+}
+
 static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
 {
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9b53135..5a1ed38 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3601,20 +3601,28 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu 
*vcpu,
}
 }
 
-static void update_permission_bitmask(struct kvm_vcpu *vcpu,
+void update_permission_bitmask(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu, bool ept)
 {
unsigned bit, byte, pfec;
u8 map;
-   bool fault, x, w, u, wf, uf, ff, smep;
+   bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, smep, smap = 0;
 
smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+   cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
pfec = byte << 1;
map = 0;
wf = pfec & PFERR_WRITE_MASK;
uf = pfec & PFERR_USER_MASK;
ff = pfec & PFERR_FETCH_MASK;
+   /*
+* PFERR_RSVD_MASK bit is used to detect SMAP violation.
+* We will check it in permission_fault(), this bit is
+* set in pfec for normal fault, while it is cleared for
+* SMAP violations.
+*/
+   smapf = !(pfec & PFERR_RSVD_MASK);
for (bit = 0; bit < 8; ++bit) {
x = bit & ACC_EXEC_MASK;
w = bit & ACC_WRITE_MASK;
@@ -3627,11 +3635,32 @@ static void update_permission_bitmask(struct kvm_vcpu 
*vcpu,
w |= !is_write_protection(vcpu) && !uf;
/* Disallow supervisor fetches of user code if 
cr4.smep */
x &= !(smep && u && !uf);
+
+   /*
+* SMAP:kernel-mode data accesses from user-mode
+* mappings should fault. A fault is considered
+* as a SMAP violation if all of the following
+* conditions are ture:
+*   - X86_CR4_SMAP is set in CR4
+*   - An user page is accessed
+*   - Page fault in kernel mode
+*   - !(CPL<3 && X86_EFLAGS_AC is set)
+*
+*   Here, we cover the first three conditions,
+*   The CPL and X86_EFLAGS_AC is in smapf,which
+*   permission_fault() computes dynamically.
+*
+*   Also, SMAP does not affect instruction
+*   fetches, add the !ff check here to make it
+*   clearer.
+*/
+   smap = cr4_smap && u && !uf && !ff;
} else
/* Not really needed: no U/S accesses on ept  */
u = 1;
 
-   fault = (ff && !x) || (uf && !u) || (wf && !w);
+   fault = (ff && !x) || (uf && !u) || (wf && !w) ||
+   (smapf && smap);
map |= fault << bit;
}
mmu->permissions[byte] = map;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 2926152..822190f 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -44,11 +44,17 @@
 #define PT_DIRECTORY_LEVEL 2
 #define PT_PAGE_TABLE_LEVEL 1
 
-#define PFERR_PRESE

[PATCH v4 0/4] KVM: enable Intel SMAP for KVM

2014-03-31 Thread Feng Wu
Supervisor Mode Access Prevention (SMAP) is a new security feature 
disclosed by Intel, please refer to the following document: 

http://software.intel.com/sites/default/files/319433-014.pdf
 
Every access to a linear address is either a supervisor-mode access
or a user-mode access. All accesses performed while the current
privilege level (CPL) is less than 3 are supervisor-mode accesses.
If CPL = 3, accesses are generally user-mode accesses. However, some
operations implicitly access system data structures, and the resulting
accesses to those data structures are supervisor-mode accesses regardless
of CPL. Examples of such implicit supervisor accesses include the following:
accesses to the global descriptor table (GDT) or local descriptor table
(LDT) to load a segment descriptor; accesses to the interrupt descriptor
table (IDT) when delivering an interrupt or exception; and accesses to the
task-state segment (TSS) as part of a task switch or change of CPL.

If CR4.SMAP = 1, supervisor-mode data accesses are not allowed to linear
addresses that are accessible in user mode. If CPL < 3, SMAP protections
are disabled if EFLAGS.AC = 1. If CPL = 3, SMAP applies to all supervisor-mode
data accesses (these are implicit supervisor accesses) regardless of the
value of EFLAGS.AC.

This patchset pass-through SMAP feature to guests, and let guests
benefit from it.

Version 1:
  * Remove SMAP bit from CR4_RESERVED_BITS.
  * Add SMAP support when setting CR4
  * Disable SMAP for guests in EPT realmode and EPT unpaging mode
  * Expose SMAP feature to guest

Version 2:
  * Change the logic of updating mmu permission bitmap for SMAP violation
  * Expose SMAP feature to guest in the last patch of this series.

Version 3:
  * Changes in update_permission_bitmask().
  * Use a branchless way suggested by Paolo Bonzini to detect SMAP
violation in permission_fault(). 

Version 4:
  * Changes to some comments and code style.

Feng Wu (4):
  KVM: Remove SMAP bit from CR4_RESERVED_BITS.
  KVM: Add SMAP support when setting CR4
  KVM: Disable SMAP for guests in EPT realmode and EPT unpaging mode
  KVM: expose SMAP feature to guest

 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/cpuid.c|  2 +-
 arch/x86/kvm/cpuid.h|  8 
 arch/x86/kvm/mmu.c  | 34 ---
 arch/x86/kvm/mmu.h  | 44 +
 arch/x86/kvm/paging_tmpl.h  |  2 +-
 arch/x86/kvm/vmx.c  | 11 ++-
 arch/x86/kvm/x86.c  |  9 -
 8 files changed, 92 insertions(+), 20 deletions(-)

-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 4/4] KVM: expose SMAP feature to guest

2014-03-31 Thread Feng Wu
This patch exposes SMAP feature to guest

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/cpuid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index c697625..deb5f9b 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -303,7 +303,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
/* cpuid 7.0.ebx */
const u32 kvm_supported_word9_x86_features =
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
-   F(BMI2) | F(ERMS) | f_invpcid | F(RTM);
+   F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | F(SMAP);
 
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 3/4] KVM: Disable SMAP for guests in EPT realmode and EPT unpaging mode

2014-03-31 Thread Feng Wu
SMAP is disabled if CPU is in non-paging mode in hardware.
However KVM always uses paging mode to emulate guest non-paging
mode with TDP. To emulate this behavior, SMAP needs to be
manually disabled when guest switches to non-paging mode.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/vmx.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3927528..e58cb5f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3452,13 +3452,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned 
long cr4)
hw_cr4 &= ~X86_CR4_PAE;
hw_cr4 |= X86_CR4_PSE;
/*
-* SMEP is disabled if CPU is in non-paging mode in
-* hardware. However KVM always uses paging mode to
+* SMEP/SMAP is disabled if CPU is in non-paging mode
+* in hardware. However KVM always uses paging mode to
 * emulate guest non-paging mode with TDP.
-* To emulate this behavior, SMEP needs to be manually
-* disabled when guest switches to non-paging mode.
+* To emulate this behavior, SMEP/SMAP needs to be
+* manually disabled when guest switches to non-paging
+* mode.
 */
-   hw_cr4 &= ~X86_CR4_SMEP;
+   hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
} else if (!(cr4 & X86_CR4_PAE)) {
hw_cr4 &= ~X86_CR4_PAE;
}
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 1/4] KVM: Remove SMAP bit from CR4_RESERVED_BITS.

2014-03-31 Thread Feng Wu
This patch removes SMAP bit from CR4_RESERVED_BITS.

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fdf83af..4eeb049 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -60,7 +60,7 @@
  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | 
X86_CR4_PCIDE \
  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
- | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
+ | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 2/4] KVM: Add SMAP support when setting CR4

2014-03-31 Thread Feng Wu
This patch adds SMAP handling logic when setting CR4 for guests

Thanks a lot to Paolo Bonzini for his suggestion to use the branchless
way to detect SMAP violation.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/cpuid.h   |  8 
 arch/x86/kvm/mmu.c | 34 +++---
 arch/x86/kvm/mmu.h | 44 
 arch/x86/kvm/paging_tmpl.h |  2 +-
 arch/x86/kvm/x86.c |  9 -
 5 files changed, 84 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index a2a1bb7..eeecbed 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -48,6 +48,14 @@ static inline bool guest_cpuid_has_smep(struct kvm_vcpu 
*vcpu)
return best && (best->ebx & bit(X86_FEATURE_SMEP));
 }
 
+static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu)
+{
+   struct kvm_cpuid_entry2 *best;
+
+   best = kvm_find_cpuid_entry(vcpu, 7, 0);
+   return best && (best->ebx & bit(X86_FEATURE_SMAP));
+}
+
 static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
 {
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9b53135..a183783 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3601,20 +3601,27 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu 
*vcpu,
}
 }
 
-static void update_permission_bitmask(struct kvm_vcpu *vcpu,
+void update_permission_bitmask(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu, bool ept)
 {
unsigned bit, byte, pfec;
u8 map;
-   bool fault, x, w, u, wf, uf, ff, smep;
+   bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, smep, smap = 0;
 
smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+   cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
pfec = byte << 1;
map = 0;
wf = pfec & PFERR_WRITE_MASK;
uf = pfec & PFERR_USER_MASK;
ff = pfec & PFERR_FETCH_MASK;
+   /*
+* PFERR_RSVD_MASK bit is set in PFEC if the access is not
+* subject to SMAP restrictions, and cleared otherwise. The
+* bit is only meaningful if the SMAP bit is set in CR4.
+*/
+   smapf = !(pfec & PFERR_RSVD_MASK);
for (bit = 0; bit < 8; ++bit) {
x = bit & ACC_EXEC_MASK;
w = bit & ACC_WRITE_MASK;
@@ -3627,11 +3634,32 @@ static void update_permission_bitmask(struct kvm_vcpu 
*vcpu,
w |= !is_write_protection(vcpu) && !uf;
/* Disallow supervisor fetches of user code if 
cr4.smep */
x &= !(smep && u && !uf);
+
+   /*
+* SMAP:kernel-mode data accesses from user-mode
+* mappings should fault. A fault is considered
+* as a SMAP violation if all of the following
+* conditions are ture:
+*   - X86_CR4_SMAP is set in CR4
+*   - An user page is accessed
+*   - Page fault in kernel mode
+*   - if CPL = 3 or X86_EFLAGS_AC is clear
+*
+*   Here, we cover the first three conditions.
+*   The fourth is computed dynamically in
+*   permission_fault() and is in smapf.
+*
+*   Also, SMAP does not affect instruction
+*   fetches, add the !ff check here to make it
+*   clearer.
+*/
+   smap = cr4_smap && u && !uf && !ff;
} else
/* Not really needed: no U/S accesses on ept  */
u = 1;
 
-   fault = (ff && !x) || (uf && !u) || (wf && !w);
+   fault = (ff && !x) || (uf && !u) || (wf && !w) ||
+   (smapf && smap);
map |= fault << bit;
}
mmu->permissions[byte] = map;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 2926152..3842e70 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -44,11 +44,17 @@
 #define PT_DIRECTORY_LEVEL 2
 #define PT_PAGE_TABLE_LEVEL 1
 
-#define PFERR_PRESENT_MASK (1U << 0)
-#define PFERR_WRITE_MASK (1U

[PATCH] Rename variable smep to cr4_smep

2014-03-31 Thread Feng Wu
This patch is based on the smap patchset

Feng Wu (1):
  Rename variable smep to cr4_smep

 arch/x86/kvm/mmu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Rename variable smep to cr4_smep

2014-03-31 Thread Feng Wu
Rename variable smep to cr4_smep, which can better reflect the
meaning of the variable.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/mmu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a183783..6000557 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3606,9 +3606,9 @@ void update_permission_bitmask(struct kvm_vcpu *vcpu,
 {
unsigned bit, byte, pfec;
u8 map;
-   bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, smep, smap = 0;
+   bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, cr4_smep, smap = 0;
 
-   smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+   cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
pfec = byte << 1;
@@ -3633,7 +3633,7 @@ void update_permission_bitmask(struct kvm_vcpu *vcpu,
/* Allow supervisor writes if !cr0.wp */
w |= !is_write_protection(vcpu) && !uf;
/* Disallow supervisor fetches of user code if 
cr4.smep */
-   x &= !(smep && u && !uf);
+   x &= !(cr4_smep && u && !uf);
 
/*
 * SMAP:kernel-mode data accesses from user-mode
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

2015-11-08 Thread Feng Wu
Use vector-hashing to handle lowest-priority interrupts for
posted-interrupts. As an example, modern Intel CPUs use this
method to handle lowest-priority interrupts.

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/irq_comm.c | 52 +
 arch/x86/kvm/lapic.c| 57 +
 arch/x86/kvm/lapic.h|  2 ++
 arch/x86/kvm/vmx.c  | 14 --
 5 files changed, 125 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9265196..e225106 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1258,6 +1258,8 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
 
 bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
 struct kvm_vcpu **dest_vcpu);
+struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
+ struct kvm_lapic_irq *irq);
 
 void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
 struct kvm_lapic_irq *irq);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 84b96d3..8156e45 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -266,6 +266,58 @@ out:
return r;
 }
 
+/*
+ * This routine handles lowest-priority interrupts using vector-hashing
+ * mechanism. As an example, modern Intel CPUs use this method to handle
+ * lowest-priority interrupts.
+ *
+ * Here is the details about the vector-hashing mechanism:
+ * 1. For lowest-priority interrupts, store all the possible destination
+ *vCPUs in an array.
+ * 2. Use "guest vector % max number of destination vCPUs" to find the right
+ *destination vCPU in the array for the lowest-priority interrupt.
+ */
+struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
+ struct kvm_lapic_irq *irq)
+
+{
+   unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+   unsigned int dest_vcpus = 0;
+   struct kvm_vcpu *vcpu;
+   unsigned int i, mod, idx = 0;
+
+   vcpu = kvm_intr_vector_hashing_dest_fast(kvm, irq);
+   if (vcpu)
+   return vcpu;
+
+   memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
+
+   kvm_for_each_vcpu(i, vcpu, kvm) {
+   if (!kvm_apic_present(vcpu))
+   continue;
+
+   if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
+   irq->dest_id, irq->dest_mode))
+   continue;
+
+   __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap);
+   dest_vcpus++;
+   }
+
+   if (dest_vcpus == 0)
+   return NULL;
+
+   mod = irq->vector % dest_vcpus;
+
+   for (i = 0; i <= mod; i++) {
+   idx = find_next_bit(dest_vcpu_bitmap, KVM_MAX_VCPUS, idx) + 1;
+   BUG_ON(idx >= KVM_MAX_VCPUS);
+   }
+
+   return kvm_get_vcpu(kvm, idx - 1);
+}
+EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
+
 bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
 struct kvm_vcpu **dest_vcpu)
 {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ecd4ea1..4937aa4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -816,6 +816,63 @@ out:
return ret;
 }
 
+struct kvm_vcpu *kvm_intr_vector_hashing_dest_fast(struct kvm *kvm,
+  struct kvm_lapic_irq *irq)
+{
+   struct kvm_apic_map *map;
+   struct kvm_vcpu *vcpu = NULL;
+
+   if (irq->shorthand)
+   return NULL;
+
+   rcu_read_lock();
+   map = rcu_dereference(kvm->arch.apic_map);
+
+   if (!map)
+   goto out;
+
+   if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
+   kvm_lowest_prio_delivery(irq)) {
+   u16 cid;
+   int i, idx = 0;
+   unsigned long bitmap = 1;
+   unsigned int mod, dest_vcpus = 0;
+   struct kvm_lapic **dst = NULL;
+
+
+   if (!kvm_apic_logical_map_valid(map))
+   goto out;
+
+   apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
+
+   if (cid >= ARRAY_SIZE(map->logical_map))
+   goto out;
+
+   dst = map->logical_map[cid];
+
+   for_each_set_bit(i, &bitmap, 16) {
+   if (!dst[i])
+   continue;
+
+   dest_vcpus++;
+   }
+
+   mod = irq->vector % dest_vcpus;
+
+   for (i = 0; i <= mod; i++) {
+   idx = find_next_bit(&bitmap, KVM_MAX_VCPUS, idx) + 1;
+   BUG_ON(idx >= KVM_

[PATCH v2 0/2] Add vector-hashing support for lowest-priority interrupts delivery

2015-12-15 Thread Feng Wu
This series add vector-hashing support for lowest-priority interrupts
delivery. As an example, modern Intel CPUs in server platform can use
this method to handle lowest-priority interrupts.

v2:
- Add vector-hashing support for non-vt-d PI case
- Fix some bugs Radim pointed out in v1
- Use a module parameter to control the vector-hashing mechanism

Feng Wu (2):
  KVM: x86: Use vector-hashing to deliver lowest-priority interrupts
  KVM: x86: Add lowest-priority support for vt-d posted-interrupts

 arch/x86/kvm/irq_comm.c |  27 +--
 arch/x86/kvm/lapic.c| 124 
 arch/x86/kvm/lapic.h|   4 ++
 arch/x86/kvm/vmx.c  |  12 -
 arch/x86/kvm/x86.c  |   9 
 arch/x86/kvm/x86.h  |   1 +
 6 files changed, 160 insertions(+), 17 deletions(-)

-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

2015-12-15 Thread Feng Wu
Use vector-hashing to deliver lowest-priority interrupts for
VT-d posted-interrupts.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/lapic.c | 67 
 arch/x86/kvm/lapic.h |  2 ++
 arch/x86/kvm/vmx.c   | 12 --
 3 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e29001f..d4f2c8f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -854,6 +854,73 @@ out:
 }
 
 /*
+ * This routine handles lowest-priority interrupts using vector-hashing
+ * mechanism. As an example, modern Intel CPUs use this method to handle
+ * lowest-priority interrupts.
+ *
+ * Here is the details about the vector-hashing mechanism:
+ * 1. For lowest-priority interrupts, store all the possible destination
+ *vCPUs in an array.
+ * 2. Use "guest vector % max number of destination vCPUs" to find the right
+ *destination vCPU in the array for the lowest-priority interrupt.
+ */
+struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
+ struct kvm_lapic_irq *irq)
+{
+   struct kvm_apic_map *map;
+   struct kvm_vcpu *vcpu = NULL;
+
+   if (irq->shorthand)
+   return NULL;
+
+   rcu_read_lock();
+   map = rcu_dereference(kvm->arch.apic_map);
+
+   if (!map)
+   goto out;
+
+   if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
+   kvm_lowest_prio_delivery(irq)) {
+   u16 cid;
+   int i, idx = 0;
+   unsigned long bitmap = 1;
+   unsigned int dest_vcpus = 0;
+   struct kvm_lapic **dst = NULL;
+
+
+   if (!kvm_apic_logical_map_valid(map))
+   goto out;
+
+   apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
+
+   if (cid >= ARRAY_SIZE(map->logical_map))
+   goto out;
+
+   dst = map->logical_map[cid];
+
+   for_each_set_bit(i, &bitmap, 16) {
+   if (!dst[i] && !kvm_lapic_enabled(dst[i]->vcpu)) {
+   clear_bit(i, &bitmap);
+   continue;
+   }
+   }
+
+   dest_vcpus = hweight16(bitmap);
+
+   if (dest_vcpus != 0) {
+   idx = kvm_vector_2_index(irq->vector, dest_vcpus,
+&bitmap, 16);
+   vcpu = dst[idx-1]->vcpu;
+   }
+   }
+
+out:
+   rcu_read_unlock();
+   return vcpu;
+}
+EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
+
+/*
  * Add a pending IRQ into lapic.
  * Return 1 if successfully added and 0 if discarded.
  */
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 6890ef0..52bffce 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -172,4 +172,6 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct 
kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu);
 int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
   const unsigned long *bitmap, u32 bitmap_size);
+struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
+ struct kvm_lapic_irq *irq);
 #endif
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5eb56ed..3f89189 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10702,8 +10702,16 @@ static int vmx_update_pi_irte(struct kvm *kvm, 
unsigned int host_irq,
 */
 
kvm_set_msi_irq(e, &irq);
-   if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
-   continue;
+
+   if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+   if (!kvm_vector_hashing_enabled() ||
+   irq.delivery_mode != APIC_DM_LOWEST)
+   continue;
+
+   vcpu = kvm_intr_vector_hashing_dest(kvm, &irq);
+   if (!vcpu)
+   continue;
+   }
 
vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
vcpu_info.vector = irq.vector;
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 1/2] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts

2015-12-15 Thread Feng Wu
Use vector-hashing to deliver lowest-priority interrupts, As an
example, modern Intel CPUs in server platform use this method to
handle lowest-priority interrupts.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/irq_comm.c | 27 ++-
 arch/x86/kvm/lapic.c| 57 -
 arch/x86/kvm/lapic.h|  2 ++
 arch/x86/kvm/x86.c  |  9 
 arch/x86/kvm/x86.h  |  1 +
 5 files changed, 81 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 84b96d3..c8c5f61 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -32,6 +32,7 @@
 #include "ioapic.h"
 
 #include "lapic.h"
+#include "x86.h"
 
 static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
   struct kvm *kvm, int irq_source_id, int level,
@@ -53,8 +54,10 @@ static int kvm_set_ioapic_irq(struct 
kvm_kernel_irq_routing_entry *e,
 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, unsigned long *dest_map)
 {
-   int i, r = -1;
+   int i, r = -1, idx = 0;
struct kvm_vcpu *vcpu, *lowest = NULL;
+   unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+   unsigned int dest_vcpus = 0;
 
if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
kvm_lowest_prio_delivery(irq)) {
@@ -65,6 +68,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
kvm_lapic *src,
if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
return r;
 
+   memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
+
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!kvm_apic_present(vcpu))
continue;
@@ -78,13 +83,25 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
kvm_lapic *src,
r = 0;
r += kvm_apic_set_irq(vcpu, irq, dest_map);
} else if (kvm_lapic_enabled(vcpu)) {
-   if (!lowest)
-   lowest = vcpu;
-   else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
-   lowest = vcpu;
+   if (!kvm_vector_hashing_enabled()) {
+   if (!lowest)
+   lowest = vcpu;
+   else if (kvm_apic_compare_prio(vcpu, lowest) < 
0)
+   lowest = vcpu;
+   } else {
+   __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap);
+   dest_vcpus++;
+   }
}
}
 
+   if (dest_vcpus != 0) {
+   idx = kvm_vector_2_index(irq->vector, dest_vcpus,
+dest_vcpu_bitmap, KVM_MAX_VCPUS);
+
+   lowest = kvm_get_vcpu(kvm, idx - 1);
+   }
+
if (lowest)
r = kvm_apic_set_irq(lowest, irq, dest_map);
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ecd4ea1..e29001f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -678,6 +678,22 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct 
kvm_lapic *source,
}
 }
 
+int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
+  const unsigned long *bitmap, u32 bitmap_size)
+{
+   u32 mod;
+   int i, idx = 0;
+
+   mod = vector % dest_vcpus;
+
+   for (i = 0; i <= mod; i++) {
+   idx = find_next_bit(bitmap, bitmap_size, idx) + 1;
+   BUG_ON(idx > bitmap_size);
+   }
+
+   return idx;
+}
+
 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
 {
@@ -731,17 +747,38 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, 
struct kvm_lapic *src,
dst = map->logical_map[cid];
 
if (kvm_lowest_prio_delivery(irq)) {
-   int l = -1;
-   for_each_set_bit(i, &bitmap, 16) {
-   if (!dst[i])
-   continue;
-   if (l < 0)
-   l = i;
-   else if (kvm_apic_compare_prio(dst[i]->vcpu, 
dst[l]->vcpu) < 0)
-   l = i;
+   if (!kvm_vector_hashing_enabled()) {
+   int l = -1;
+   for_each_set_bit(i, &bitmap, 16) {
+   if (!dst[i])
+   continue;
+   if (l < 0)
+   l = i;
+  

[v4 06/16] KVM: Make struct kvm_irq_routing_table accessible

2015-06-11 Thread Feng Wu
Move struct kvm_irq_routing_table from irqchip.c to kvm_host.h,
so we can use it outside of irqchip.c.

Signed-off-by: Feng Wu 
---
 include/linux/kvm_host.h | 15 +++
 virt/kvm/irqchip.c   | 11 ---
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ad45054..f591f7c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -321,6 +321,21 @@ struct kvm_kernel_irq_routing_entry {
struct hlist_node link;
 };
 
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+
+struct kvm_irq_routing_table {
+   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+   struct kvm_kernel_irq_routing_entry *rt_entries;
+   u32 nr_rt_entries;
+   /*
+* Array indexed by gsi. Each entry contains list of irq chips
+* the gsi is connected to.
+*/
+   struct hlist_head map[0];
+};
+
+#endif
+
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
 #endif
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 1d56a90..bac3b52 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,17 +31,6 @@
 #include 
 #include "irq.h"
 
-struct kvm_irq_routing_table {
-   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
-   struct kvm_kernel_irq_routing_entry *rt_entries;
-   u32 nr_rt_entries;
-   /*
-* Array indexed by gsi. Each entry contains list of irq chips
-* the gsi is connected to.
-*/
-   struct hlist_head map[0];
-};
-
 int kvm_irq_map_gsi(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *entries, int gsi)
 {
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v4 08/16] KVM: kvm-vfio: User API for IRQ forwarding

2015-06-11 Thread Feng Wu
From: Eric Auger 

This patch adds and documents a new KVM_DEV_VFIO_DEVICE group
and 2 device attributes: KVM_DEV_VFIO_DEVICE_FORWARD_IRQ,
KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ. The purpose is to be able
to set a VFIO device IRQ as forwarded or not forwarded.
the command takes as argument a handle to a new struct named
kvm_vfio_dev_irq.

Signed-off-by: Eric Auger 
---
 Documentation/virtual/kvm/devices/vfio.txt | 34 --
 include/uapi/linux/kvm.h   | 12 +++
 2 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/Documentation/virtual/kvm/devices/vfio.txt 
b/Documentation/virtual/kvm/devices/vfio.txt
index ef51740..6186e6d 100644
--- a/Documentation/virtual/kvm/devices/vfio.txt
+++ b/Documentation/virtual/kvm/devices/vfio.txt
@@ -4,15 +4,20 @@ VFIO virtual device
 Device types supported:
   KVM_DEV_TYPE_VFIO
 
-Only one VFIO instance may be created per VM.  The created device
-tracks VFIO groups in use by the VM and features of those groups
-important to the correctness and acceleration of the VM.  As groups
-are enabled and disabled for use by the VM, KVM should be updated
-about their presence.  When registered with KVM, a reference to the
-VFIO-group is held by KVM.
+Only one VFIO instance may be created per VM.
+
+The created device tracks VFIO groups in use by the VM and features
+of those groups important to the correctness and acceleration of
+the VM.  As groups are enabled and disabled for use by the VM, KVM
+should be updated about their presence.  When registered with KVM,
+a reference to the VFIO-group is held by KVM.
+
+The device also enables to control some IRQ settings of VFIO devices:
+forwarding/posting.
 
 Groups:
   KVM_DEV_VFIO_GROUP
+  KVM_DEV_VFIO_DEVICE
 
 KVM_DEV_VFIO_GROUP attributes:
   KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking
@@ -20,3 +25,20 @@ KVM_DEV_VFIO_GROUP attributes:
 
 For each, kvm_device_attr.addr points to an int32_t file descriptor
 for the VFIO group.
+
+KVM_DEV_VFIO_DEVICE attributes:
+  KVM_DEV_VFIO_DEVICE_FORWARD_IRQ: set a VFIO device IRQ as forwarded
+  KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ: set a VFIO device IRQ as not forwarded
+
+For each, kvm_device_attr.addr points to a kvm_vfio_dev_irq struct.
+
+When forwarded, a physical IRQ is completed by the guest and not by the
+host. This requires HW support in the interrupt controller.
+
+Forwarding can only be set when the corresponding VFIO IRQ is not masked
+(would it be through VFIO_DEVICE_SET_IRQS command or as a consequence of this
+IRQ being currently handled) or active at interrupt controller level.
+In such a situation, -EAGAIN is returned. It is advised to to set the
+forwarding before the VFIO signaling is set up, this avoids trial and errors.
+
+Unforwarding can happen at any time.
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 4b60056..798f3e4 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -999,6 +999,9 @@ struct kvm_device_attr {
 #define  KVM_DEV_VFIO_GROUP1
 #define   KVM_DEV_VFIO_GROUP_ADD   1
 #define   KVM_DEV_VFIO_GROUP_DEL   2
+#define  KVM_DEV_VFIO_DEVICE   2
+#define   KVM_DEV_VFIO_DEVICE_FORWARD_IRQ  1
+#define   KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ2
 
 enum kvm_device_type {
KVM_DEV_TYPE_FSL_MPIC_20= 1,
@@ -1018,6 +1021,15 @@ enum kvm_device_type {
KVM_DEV_TYPE_MAX,
 };
 
+struct kvm_vfio_dev_irq {
+   __u32   argsz;  /* structure length */
+   __u32   fd; /* file descriptor of the VFIO device */
+   __u32   index;  /* VFIO device IRQ index */
+   __u32   start;  /* start of subindex range */
+   __u32   count;  /* size of subindex range */
+   __u32   gsi[];  /* gsi, ie. virtual IRQ number */
+};
+
 /*
  * ioctls for VM fds
  */
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v4 00/16] Add VT-d Posted-Interrupts support

2015-06-11 Thread Feng Wu
VT-d Posted-Interrupts is an enhancement to CPU side Posted-Interrupt.
With VT-d Posted-Interrupts enabled, external interrupts from
direct-assigned devices can be delivered to guests without VMM
intervention when guest is running in non-root mode.

You can find the VT-d Posted-Interrtups Spec. in the following URL:
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

This series was part of http://thread.gmane.org/gmane.linux.kernel.iommu/7708. 
To make things clear, send out IOMMU part here.

This patch-set is based on the lastest x86/apic branch of tip tree.

Divide the whole series which contain multiple components into three parts:
- Prerequisite changes to irq subsystem (already merged in tip/x86/apic)
- IOMMU part (about to be merged in merged in tip/x86/apic), here is the
  latest version: https://lkml.org/lkml/2015/6/9/22
- KVM and VFIO parts (this series)

Patch 8, 9, and 10 are from Eric Auger, there are some common VFIO APIs
defined in them. I integrate them in this series and use the APIs in
my patches.

v4:
* For lowest-priority interrupt, only support single-CPU destination
interrupts at the current stage, more common lowest priority support
will be added later.
* Accoring to Marcelo's suggestion, when vCPU is blocked, we handle
the posted-interrupts in the HLT emulation path.
* Some small changes (coding style, typo, add some code comments)

Eric Auger (3):
  KVM: kvm-vfio: User API for IRQ forwarding
  VFIO: external user API for interaction
  KVM: kvm-vfio: wrappers to VFIO external API device helpers

Feng Wu (13):
  KVM: Extend struct pi_desc for VT-d Posted-Interrupts
  KVM: Add some helper functions for Posted-Interrupts
  KVM: Define a new interface kvm_intr_is_single_vcpu()
  KVM: Get Posted-Interrupts descriptor address from struct kvm_vcpu
  KVM: Add interfaces to control PI outside vmx
  KVM: Make struct kvm_irq_routing_table accessible
  KVM: make kvm_set_msi_irq() public
  KVM: kvm-vfio: User API for VT-d Posted-Interrupts
  KVM: kvm-vfio: implement the VFIO skeleton for VT-d Posted-Interrupts
  KVM: x86: kvm-vfio: VT-d posted-interrupts setup
  KVM: Update Posted-Interrupts Descriptor when vCPU is preempted
  KVM: Update Posted-Interrupts Descriptor when vCPU is blocked
  KVM: Warn if 'SN' is set during posting interrupts by software

 Documentation/virtual/kvm/devices/vfio.txt |  43 -
 arch/x86/include/asm/kvm_host.h|  16 ++
 arch/x86/kvm/Makefile  |   3 +-
 arch/x86/kvm/irq_comm.c|  28 ++-
 arch/x86/kvm/kvm_vfio_x86.c|  85 +
 arch/x86/kvm/vmx.c | 278 -
 arch/x86/kvm/x86.c |  42 +++--
 drivers/vfio/vfio.c|  24 +++
 include/linux/kvm_host.h   |  40 +
 include/linux/vfio.h   |   3 +
 include/uapi/linux/kvm.h   |  14 ++
 virt/kvm/irqchip.c |  11 --
 virt/kvm/kvm_main.c|   3 +
 virt/kvm/vfio.c| 200 +
 14 files changed, 758 insertions(+), 32 deletions(-)
 create mode 100644 arch/x86/kvm/kvm_vfio_x86.c

-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v4 01/16] KVM: Extend struct pi_desc for VT-d Posted-Interrupts

2015-06-11 Thread Feng Wu
Extend struct pi_desc for VT-d Posted-Interrupts.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/vmx.c | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f7b6168..bd26501 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -446,8 +446,24 @@ struct nested_vmx {
 /* Posted-Interrupt Descriptor */
 struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */
-   u32 control;/* bit 0 of control is outstanding notification bit */
-   u32 rsvd[7];
+   union {
+   struct {
+   /* bit 256 - Outstanding Notification */
+   u64 on  : 1,
+   /* bit 257 - Suppress Notification */
+   sn  : 1,
+   /* bit 271:258 - Reserved */
+   rsvd_1  : 14,
+   /* bit 279:272 - Notification Vector */
+   nv  : 8,
+   /* bit 287:280 - Reserved */
+   rsvd_2  : 8,
+   /* bit 319:288 - Notification Destination */
+   ndst: 32;
+   };
+   u64 control;
+   };
+   u32 rsvd[6];
 } __aligned(64);
 
 static bool pi_test_and_set_on(struct pi_desc *pi_desc)
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v4 03/16] KVM: Define a new interface kvm_intr_is_single_vcpu()

2015-06-11 Thread Feng Wu
This patch defines a new interface kvm_intr_is_single_vcpu(),
which can returns whether the interrupt is for single-CPU or not.

It is used by VT-d PI, since now we only support single-CPU
interrupts, For lowest-priority interrupts, if user configures
it via /proc/irq or uses irqbalance to make it single-CPU, we
can use PI to deliver the interrupts to it. Full functionality
of lowest-priority support will be added later.

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/irq_comm.c | 24 
 2 files changed, 26 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dea2e7e..cab4141 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1179,4 +1179,6 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, 
u64 *data);
 void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
 void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
 
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+struct kvm_vcpu **dest_vcpu);
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 72298b3..9e42645 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -299,6 +299,30 @@ out:
return r;
 }
 
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+struct kvm_vcpu **dest_vcpu)
+{
+   int i, r = 0;
+   struct kvm_vcpu *vcpu;
+
+   kvm_for_each_vcpu(i, vcpu, kvm) {
+   if (!kvm_apic_present(vcpu))
+   continue;
+
+   if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
+   irq->dest_id, irq->dest_mode))
+   continue;
+
+   r++;
+   *dest_vcpu = vcpu;
+   }
+
+   if (r == 1)
+   return true;
+   else
+   return false;
+}
+
 #define IOAPIC_ROUTING_ENTRY(irq) \
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,  \
  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v4 05/16] KVM: Add interfaces to control PI outside vmx

2015-06-11 Thread Feng Wu
This patch adds pi_clear_sn and pi_set_sn to struct kvm_x86_ops,
so we can set/clear SN outside vmx.

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/vmx.c  | 13 +
 2 files changed, 16 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c7a09fb..69bc770 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -835,6 +835,9 @@ struct kvm_x86_ops {
   gfn_t offset, unsigned long mask);
 
u64 (*get_pi_desc_addr)(struct kvm_vcpu *vcpu);
+
+   void (*pi_clear_sn)(struct kvm_vcpu *vcpu);
+   void (*pi_set_sn)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ccc9ce4..dd3c63a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -615,6 +615,16 @@ struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
return &(to_vmx(vcpu)->pi_desc);
 }
 
+static void vmx_pi_clear_sn(struct kvm_vcpu *vcpu)
+{
+   pi_clear_sn(vcpu_to_pi_desc(vcpu));
+}
+
+static void vmx_pi_set_sn(struct kvm_vcpu *vcpu)
+{
+   pi_set_sn(vcpu_to_pi_desc(vcpu));
+}
+
 static unsigned long shadow_read_only_fields[] = {
/*
 * We do NOT shadow fields that are modified when L0
@@ -10307,6 +10317,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
 
.get_pi_desc_addr = vmx_get_pi_desc_addr,
+
+   .pi_clear_sn = vmx_pi_clear_sn,
+   .pi_set_sn = vmx_pi_set_sn,
 };
 
 static int __init vmx_init(void)
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v4 02/16] KVM: Add some helper functions for Posted-Interrupts

2015-06-11 Thread Feng Wu
This patch adds some helper functions to manipulate the
Posted-Interrupts Descriptor.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/vmx.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bd26501..8be6aa4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -443,6 +443,8 @@ struct nested_vmx {
 };
 
 #define POSTED_INTR_ON  0
+#define POSTED_INTR_SN  1
+
 /* Posted-Interrupt Descriptor */
 struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */
@@ -483,6 +485,30 @@ static int pi_test_and_set_pir(int vector, struct pi_desc 
*pi_desc)
return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
 }
 
+static void pi_clear_sn(struct pi_desc *pi_desc)
+{
+   return clear_bit(POSTED_INTR_SN,
+   (unsigned long *)&pi_desc->control);
+}
+
+static void pi_set_sn(struct pi_desc *pi_desc)
+{
+   return set_bit(POSTED_INTR_SN,
+   (unsigned long *)&pi_desc->control);
+}
+
+static int pi_test_on(struct pi_desc *pi_desc)
+{
+   return test_bit(POSTED_INTR_ON,
+   (unsigned long *)&pi_desc->control);
+}
+
+static int pi_test_sn(struct pi_desc *pi_desc)
+{
+   return test_bit(POSTED_INTR_SN,
+   (unsigned long *)&pi_desc->control);
+}
+
 struct vcpu_vmx {
struct kvm_vcpu   vcpu;
unsigned long host_rsp;
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v4 15/16] KVM: Update Posted-Interrupts Descriptor when vCPU is blocked

2015-06-11 Thread Feng Wu
This patch updates the Posted-Interrupts Descriptor when vCPU
is blocked.

pre-block:
- Add the vCPU to the blocked per-CPU list
- Set 'NV' to POSTED_INTR_WAKEUP_VECTOR

post-block:
- Remove the vCPU from the per-CPU list

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/kvm_host.h |   3 +
 arch/x86/kvm/vmx.c  | 158 
 arch/x86/kvm/x86.c  |  42 ---
 include/linux/kvm_host.h|   3 +
 virt/kvm/kvm_main.c |   3 +
 5 files changed, 199 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1605bf8..2ef6f2a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -842,6 +842,9 @@ struct kvm_x86_ops {
 
void (*pi_clear_sn)(struct kvm_vcpu *vcpu);
void (*pi_set_sn)(struct kvm_vcpu *vcpu);
+
+   int (*pi_pre_block)(struct kvm_vcpu *vcpu);
+   void (*pi_post_block)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9d9b403..7e8a800 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -888,6 +888,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
 static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 
+/*
+ * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
+ * can find which vCPU should be waken up.
+ */
+static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+
 static unsigned long *vmx_io_bitmap_a;
 static unsigned long *vmx_io_bitmap_b;
 static unsigned long *vmx_msr_bitmap_legacy;
@@ -2972,6 +2979,8 @@ static int hardware_enable(void)
return -EBUSY;
 
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
+   INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
+   spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
 
/*
 * Now we can enable the vmclear operation in kdump
@@ -6099,6 +6108,25 @@ static void update_ple_window_actual_max(void)
ple_window_grow, INT_MIN);
 }
 
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+static void wakeup_handler(void)
+{
+   struct kvm_vcpu *vcpu;
+   int cpu = smp_processor_id();
+
+   spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+   list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
+   blocked_vcpu_list) {
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+   if (pi_test_on(pi_desc) == 1)
+   kvm_vcpu_kick(vcpu);
+   }
+   spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+}
+
 static __init int hardware_setup(void)
 {
int r = -ENOMEM, i, msr;
@@ -6283,6 +6311,8 @@ static __init int hardware_setup(void)
kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
}
 
+   kvm_set_posted_intr_wakeup_handler(wakeup_handler);
+
return alloc_kvm_area();
 
 out8:
@@ -10236,6 +10266,131 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm 
*kvm,
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
 }
 
+/*
+ * This routine does the following things for vCPU which is going
+ * to be blocked if VT-d PI is enabled.
+ * - Store the vCPU to the wakeup list, so when interrupts happen
+ *   we can find the right vCPU to wake up.
+ * - Change the Posted-interrupt descriptor as below:
+ *  'NDST' <-- vcpu->pre_pcpu
+ *  'NV' <-- POSTED_INTR_WAKEUP_VECTOR
+ * - If 'ON' is set during this process, which means at least one
+ *   interrupt is posted for this vCPU, we cannot block it, in
+ *   this case, return 1, otherwise, return 0.
+ *
+ */
+static int vmx_pi_pre_block(struct kvm_vcpu *vcpu)
+{
+   unsigned long flags;
+   unsigned int dest;
+   struct pi_desc old, new;
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+   if (!irq_remapping_cap(IRQ_POSTING_CAP))
+   return 0;
+
+   vcpu->pre_pcpu = vcpu->cpu;
+   spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+ vcpu->pre_pcpu), flags);
+   list_add_tail(&vcpu->blocked_vcpu_list,
+ &per_cpu(blocked_vcpu_on_cpu,
+ vcpu->pre_pcpu));
+   spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
+  vcpu->pre_pcpu), flags);
+
+   do {
+   old.control = new.control = pi_desc->control;
+
+   /*
+* We should not block the vCPU if
+* an interrupt is posted for it.
+*/
+   if (pi_test_on(pi_desc) == 1) {
+   spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+  

[v4 10/16] KVM: kvm-vfio: wrappers to VFIO external API device helpers

2015-06-11 Thread Feng Wu
From: Eric Auger 

Provide wrapper functions that allow KVM-VFIO device code to
interact with a vfio device:
- kvm_vfio_device_get_external_user gets a handle to a struct
  vfio_device from the vfio device file descriptor and increments
  its reference counter,
- kvm_vfio_device_put_external_user decrements the reference counter
  to a vfio device,
- kvm_vfio_external_base_device returns a handle to the struct device
  of the vfio device.

Also kvm_vfio_get_vfio_device and kvm_vfio_put_vfio_device helpers
are introduced.

Signed-off-by: Eric Auger 
---
 virt/kvm/vfio.c | 74 +
 1 file changed, 74 insertions(+)

diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 620e37f..80a45e4 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -60,6 +60,80 @@ static void kvm_vfio_group_put_external_user(struct 
vfio_group *vfio_group)
symbol_put(vfio_group_put_external_user);
 }
 
+static struct vfio_device *kvm_vfio_device_get_external_user(struct file 
*filep)
+{
+   struct vfio_device *vdev;
+   struct vfio_device *(*fn)(struct file *);
+
+   fn = symbol_get(vfio_device_get_external_user);
+   if (!fn)
+   return ERR_PTR(-EINVAL);
+
+   vdev = fn(filep);
+
+   symbol_put(vfio_device_get_external_user);
+
+   return vdev;
+}
+
+static void kvm_vfio_device_put_external_user(struct vfio_device *vdev)
+{
+   void (*fn)(struct vfio_device *);
+
+   fn = symbol_get(vfio_device_put_external_user);
+   if (!fn)
+   return;
+
+   fn(vdev);
+
+   symbol_put(vfio_device_put_external_user);
+}
+
+static struct device *kvm_vfio_external_base_device(struct vfio_device *vdev)
+{
+   struct device *(*fn)(struct vfio_device *);
+   struct device *dev;
+
+   fn = symbol_get(vfio_external_base_device);
+   if (!fn)
+   return NULL;
+
+   dev = fn(vdev);
+
+   symbol_put(vfio_external_base_device);
+
+   return dev;
+}
+
+/**
+ * kvm_vfio_get_vfio_device - Returns a handle to a vfio-device
+ *
+ * Checks it is a valid vfio device and increments its reference counter
+ * @fd: file descriptor of the vfio platform device
+ */
+static struct vfio_device *kvm_vfio_get_vfio_device(int fd)
+{
+   struct fd f = fdget(fd);
+   struct vfio_device *vdev;
+
+   if (!f.file)
+   return ERR_PTR(-EINVAL);
+   vdev = kvm_vfio_device_get_external_user(f.file);
+   fdput(f);
+   return vdev;
+}
+
+/**
+ * kvm_vfio_put_vfio_device: decrements the reference counter of the
+ * vfio platform * device
+ *
+ * @vdev: vfio_device handle to release
+ */
+static void kvm_vfio_put_vfio_device(struct vfio_device *vdev)
+{
+   kvm_vfio_device_put_external_user(vdev);
+}
+
 static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group)
 {
long (*fn)(struct vfio_group *, unsigned long);
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v4 16/16] KVM: Warn if 'SN' is set during posting interrupts by software

2015-06-11 Thread Feng Wu
Currently, we don't support urgent interrupt, all interrupts
are recognized as non-urgent interrupt, so we cannot post
interrupts when 'SN' is set.

If the vcpu is in guest mode, it cannot have been scheduled out,
and that's the only case when SN is set currently, warning if
SN is set.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/vmx.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7e8a800..f1daa8b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4485,6 +4485,22 @@ static inline bool 
kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_SMP
if (vcpu->mode == IN_GUEST_MODE) {
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   /*
+* Currently, we don't support urgent interrupt,
+* all interrupts are recognized as non-urgent
+* interrupt, so we cannot post interrupts when
+* 'SN' is set.
+*
+* If the vcpu is in guest mode, it means it is
+* running instead of being scheduled out and
+* waiting in the run queue, and that's the only
+* case when 'SN' is set currently, warning if
+* 'SN' is set.
+*/
+   WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc));
+
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
POSTED_INTR_VECTOR);
return true;
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v4 14/16] KVM: Update Posted-Interrupts Descriptor when vCPU is preempted

2015-06-11 Thread Feng Wu
This patch updates the Posted-Interrupts Descriptor when vCPU
is preempted.

sched out:
- Set 'SN' to suppress furture non-urgent interrupts posted for
the vCPU.

sched in:
- Clear 'SN'
- Change NDST if vCPU is scheduled to a different CPU
- Set 'NV' to POSTED_INTR_VECTOR

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/vmx.c | 34 ++
 1 file changed, 34 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index dd3c63a..9d9b403 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -46,6 +46,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "trace.h"
 
@@ -2001,10 +2002,43 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int 
cpu)
vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
vmx->loaded_vmcs->cpu = cpu;
}
+
+   if (irq_remapping_cap(IRQ_POSTING_CAP)) {
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+   struct pi_desc old, new;
+   unsigned int dest;
+
+   do {
+   old.control = new.control = pi_desc->control;
+   if (vcpu->cpu != cpu) {
+   dest = cpu_physical_id(cpu);
+
+   if (x2apic_enabled())
+   new.ndst = dest;
+   else
+   new.ndst = (dest << 8) & 0xFF00;
+   }
+
+   /* Allow posting non-urgent interrupts */
+   new.sn = 0;
+
+   /* set 'NV' to 'notification vector' */
+   new.nv = POSTED_INTR_VECTOR;
+   } while (cmpxchg(&pi_desc->control, old.control,
+   new.control) != old.control);
+   }
 }
 
 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 {
+   if (irq_remapping_cap(IRQ_POSTING_CAP)) {
+   struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+   /* Set SN when the vCPU is preempted */
+   if (vcpu->preempted)
+   pi_set_sn(pi_desc);
+   }
+
__vmx_load_host_state(to_vmx(vcpu));
if (!vmm_exclusive) {
__loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v4 12/16] KVM: kvm-vfio: implement the VFIO skeleton for VT-d Posted-Interrupts

2015-06-11 Thread Feng Wu
This patch adds the kvm-vfio interface for VT-d Posted-Interrupts.
When guests update MSI/MSI-x information for an assigned-device,
QEMU will use KVM_DEV_VFIO_DEVICE_POST_IRQ attribute to setup
IRTE for VT-d PI. Userspace program can also use
KVM_DEV_VFIO_DEVICE_UNPOST_IRQ to change back to irq remapping mode.
This patch implements these IRQ attributes.

Signed-off-by: Feng Wu 
---
 include/linux/kvm_host.h |  22 +
 virt/kvm/vfio.c  | 126 +++
 2 files changed, 148 insertions(+)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f591f7c..69f8711 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1073,6 +1073,28 @@ extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
 
+#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POST
+/*
+ * kvm_arch_vfio_update_pi_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * @set: set or unset PI
+ * returns 0 on success, < 0 on failure
+ */
+int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+uint32_t guest_irq, bool set);
+#else
+static inline int kvm_arch_vfio_update_pi_irte(struct kvm *kvm,
+  unsigned int host_irq,
+  uint32_t guest_irq,
+  bool set)
+{
+   return 0;
+}
+#endif
+
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
 static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 80a45e4..547fc51 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "vfio.h"
 
 struct kvm_vfio_group {
@@ -276,12 +277,128 @@ static int kvm_vfio_set_group(struct kvm_device *dev, 
long attr, u64 arg)
return -ENXIO;
 }
 
+static int kvm_vfio_pci_get_irq_count(struct pci_dev *pdev, int irq_type)
+{
+   if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
+   u8 pin;
+
+   pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
+   if (pin)
+   return 1;
+   } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
+   return pci_msi_vec_count(pdev);
+   } else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX) {
+   return pci_msix_vec_count(pdev);
+   }
+
+   return 0;
+}
+
+static int kvm_vfio_control_pi(struct kvm_device *kdev,
+  int32_t __user *argp, bool set)
+{
+   struct kvm_vfio_dev_irq pi_info;
+   uint32_t *gsi;
+   unsigned long minsz;
+   struct vfio_device *vdev;
+   struct msi_desc *entry;
+   struct device *dev;
+   struct pci_dev *pdev;
+   int i, max, ret;
+
+   minsz = offsetofend(struct kvm_vfio_dev_irq, count);
+
+   if (copy_from_user(&pi_info, (void __user *)argp, minsz))
+   return -EFAULT;
+
+   if (pi_info.argsz < minsz || pi_info.index >= VFIO_PCI_NUM_IRQS)
+   return -EINVAL;
+
+   vdev = kvm_vfio_get_vfio_device(pi_info.fd);
+   if (IS_ERR(vdev))
+   return PTR_ERR(vdev);
+
+   dev = kvm_vfio_external_base_device(vdev);
+   if (!dev || !dev_is_pci(dev)) {
+   ret = -EFAULT;
+   goto put_vfio_device;
+   }
+
+   pdev = to_pci_dev(dev);
+
+   max = kvm_vfio_pci_get_irq_count(pdev, pi_info.index);
+   if (max <= 0) {
+   ret = -EFAULT;
+   goto put_vfio_device;
+   }
+
+   if (pi_info.argsz - minsz < pi_info.count * sizeof(u32) ||
+   pi_info.start >= max || pi_info.start + pi_info.count > max) {
+   ret = -EINVAL;
+   goto put_vfio_device;
+   }
+
+   gsi = memdup_user((void __user *)((unsigned long)argp + minsz),
+  pi_info.count * sizeof(u32));
+   if (IS_ERR(gsi)) {
+   ret = PTR_ERR(gsi);
+   goto put_vfio_device;
+   }
+
+#ifdef CONFIG_PCI_MSI
+   for (i = 0; i < pi_info.count; i++) {
+   list_for_each_entry(entry, &pdev->msi_list, list) {
+   if (entry->msi_attrib.entry_nr != pi_info.start+i)
+   continue;
+
+   ret = kvm_arch_vfio_update_pi_irte(kdev->kvm,
+  entry->irq,
+  gsi[i],
+  set);
+   if (ret)
+   goto free_gsi;
+   }
+   }
+#endif
+
+   ret = 0;
+
+free_gsi:
+   kfree(gsi);
+
+put_vfio_device:
+   kvm_vfio_p

[v4 07/16] KVM: make kvm_set_msi_irq() public

2015-06-11 Thread Feng Wu
Make kvm_set_msi_irq() public, we can use this function outside.

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/kvm_host.h | 4 
 arch/x86/kvm/irq_comm.c | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 69bc770..31a495f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -175,6 +175,8 @@ enum {
  */
 #define KVM_APIC_PV_EOI_PENDING1
 
+struct kvm_kernel_irq_routing_entry;
+
 /*
  * We don't want allocation failures within the mmu code, so we preallocate
  * enough memory for a single page fault in a cache.
@@ -1186,4 +1188,6 @@ void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
 
 bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
 struct kvm_vcpu **dest_vcpu);
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+struct kvm_lapic_irq *irq);
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 9e42645..58d7d49 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -94,8 +94,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
kvm_lapic *src,
return r;
 }
 
-static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
-  struct kvm_lapic_irq *irq)
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+struct kvm_lapic_irq *irq)
 {
trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
 
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v4 04/16] KVM: Get Posted-Interrupts descriptor address from struct kvm_vcpu

2015-06-11 Thread Feng Wu
Define an interface to get PI descriptor address from the vCPU structure.

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/vmx.c  | 11 +++
 2 files changed, 13 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index cab4141..c7a09fb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -833,6 +833,8 @@ struct kvm_x86_ops {
void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
   struct kvm_memory_slot *slot,
   gfn_t offset, unsigned long mask);
+
+   u64 (*get_pi_desc_addr)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8be6aa4..ccc9ce4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -610,6 +610,10 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu 
*vcpu)
 #define FIELD64(number, name)  [number] = VMCS12_OFFSET(name), \
[number##_HIGH] = VMCS12_OFFSET(name)+4
 
+struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
+{
+   return &(to_vmx(vcpu)->pi_desc);
+}
 
 static unsigned long shadow_read_only_fields[] = {
/*
@@ -4495,6 +4499,11 @@ static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu 
*vcpu)
return;
 }
 
+static u64 vmx_get_pi_desc_addr(struct kvm_vcpu *vcpu)
+{
+   return __pa((u64)vcpu_to_pi_desc(vcpu));
+}
+
 /*
  * Set up the vmcs's constant host-state fields, i.e., host-state fields that
  * will not change in the lifetime of the guest.
@@ -10296,6 +10305,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.slot_disable_log_dirty = vmx_slot_disable_log_dirty,
.flush_log_dirty = vmx_flush_log_dirty,
.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
+
+   .get_pi_desc_addr = vmx_get_pi_desc_addr,
 };
 
 static int __init vmx_init(void)
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v4 13/16] KVM: x86: kvm-vfio: VT-d posted-interrupts setup

2015-06-11 Thread Feng Wu
This patch defines macro __KVM_HAVE_ARCH_KVM_VFIO_POST and
implement kvm_arch_vfio_update_pi_irte for x86 architecture.

Signed-off-by: Feng Wu 
---
 arch/x86/include/asm/kvm_host.h |  2 +
 arch/x86/kvm/Makefile   |  3 +-
 arch/x86/kvm/kvm_vfio_x86.c | 85 +
 3 files changed, 89 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kvm/kvm_vfio_x86.c

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 31a495f..1605bf8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -81,6 +81,8 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, 
int level)
(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
 }
 
+#define __KVM_HAVE_ARCH_KVM_VFIO_POST
+
 #define KVM_PERMILLE_MMU_PAGES 20
 #define KVM_MIN_ALLOC_MMU_PAGES 64
 #define KVM_MMU_HASH_SHIFT 10
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 16e8f96..6bafc89 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,7 +12,8 @@ kvm-y += $(KVM)/kvm_main.o 
$(KVM)/coalesced_mmio.o \
 kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
 
 kvm-y  += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-  i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
+  i8254.o ioapic.o irq_comm.o cpuid.o pmu.o \
+  kvm_vfio_x86.o
 kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)+= assigned-dev.o iommu.o
 kvm-intel-y+= vmx.o
 kvm-amd-y  += svm.o
diff --git a/arch/x86/kvm/kvm_vfio_x86.c b/arch/x86/kvm/kvm_vfio_x86.c
new file mode 100644
index 000..a2d74f9
--- /dev/null
+++ b/arch/x86/kvm/kvm_vfio_x86.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2014 Intel Corporation.
+ * Authors: Feng Wu 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+
+/*
+ * kvm_arch_vfio_update_pi_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * @set: set or unset PI
+ * returns 0 on success, < 0 on failure
+ */
+int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+uint32_t guest_irq, bool set)
+{
+   struct kvm_kernel_irq_routing_entry *e;
+   struct kvm_irq_routing_table *irq_rt;
+   struct kvm_lapic_irq irq;
+   struct kvm_vcpu *vcpu;
+   struct vcpu_data vcpu_info;
+   int idx, ret = -EINVAL;
+
+   idx = srcu_read_lock(&kvm->irq_srcu);
+   irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+   BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
+
+   hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
+   if (e->type != KVM_IRQ_ROUTING_MSI)
+   continue;
+   /*
+* VT-d PI cannot support posting multicast/broadcast
+* interrupts to a VCPU, we still use interrupt remapping
+* for these kind of interrupts.
+*
+* For lowest-priority interrupts, we only support
+* those with single CPU as the destination, e.g. user
+* configures the interrupts via /proc/irq or uses
+* irqbalance to make the interrupts single-CPU.
+*
+* We will support full lowest-priority interrupt later.
+*
+*/
+
+   kvm_set_msi_irq(e, &irq);
+   if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
+   continue;
+
+   vcpu_info.pi_desc_addr = kvm_x86_ops->get_pi_desc_addr(vcpu);
+   vcpu_info.vector = irq.vector;
+
+   if (set)
+   ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
+   else {
+   /* suppress notification event before unposting */
+   kvm_x86_ops->pi_set_sn(vcpu);
+   ret = irq_set_vcpu_affinity(host_irq, NULL);
+   kvm_x86_ops->pi_clear_sn(vcpu);
+   }
+
+   if (ret < 0) {
+   printk(KERN_INFO "%s: failed to update PI IRTE\n",
+   __func__);
+   goto out;
+   }
+   }
+
+   ret = 0;
+out:
+   srcu_read_unlock(&kvm->irq_srcu, idx);
+   return ret;
+}
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubsc

[v4 11/16] KVM: kvm-vfio: User API for VT-d Posted-Interrupts

2015-06-11 Thread Feng Wu
This patch adds and documents two new attributes
KVM_DEV_VFIO_DEVICE_POST_IRQ and KVM_DEV_VFIO_DEVICE_UNPOST_IRQ
in KVM_DEV_VFIO_DEVICE group. The new attributes are used for
VT-d Posted-Interrupts.

When guest OS changes the interrupt configuration for an
assigned device, such as, MSI/MSIx data/address fields,
QEMU will use this IRQ attribute to tell KVM to update the
related IRTE according the VT-d Posted-Interrrupts Specification,
such as, the guest vector should be updated in the related IRTE.

Signed-off-by: Feng Wu 
---
 Documentation/virtual/kvm/devices/vfio.txt | 9 +
 include/uapi/linux/kvm.h   | 2 ++
 2 files changed, 11 insertions(+)

diff --git a/Documentation/virtual/kvm/devices/vfio.txt 
b/Documentation/virtual/kvm/devices/vfio.txt
index 6186e6d..34925e1 100644
--- a/Documentation/virtual/kvm/devices/vfio.txt
+++ b/Documentation/virtual/kvm/devices/vfio.txt
@@ -42,3 +42,12 @@ In such a situation, -EAGAIN is returned. It is advised to 
to set the
 forwarding before the VFIO signaling is set up, this avoids trial and errors.
 
 Unforwarding can happen at any time.
+
+  KVM_DEV_VFIO_DEVICE_POST_IRQ: set a VFIO device IRQ as posted
+  KVM_DEV_VFIO_DEVICE_UNPOST_IRQ: set a VFIO device IRQ as remapped
+For this attribute, kvm_device_attr.addr points to a kvm_vfio_dev_irq struct.
+
+When guest OS changes the interrupt configuration for an assigned device,
+such as, MSI/MSIx data/address fields, QEMU will use this IRQ attribute
+to tell KVM to update the related IRTE according the VT-d Posted-Interrrupts
+Specification, such as, the guest vector should be updated in the related IRTE.
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 798f3e4..6e17596 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1002,6 +1002,8 @@ struct kvm_device_attr {
 #define  KVM_DEV_VFIO_DEVICE   2
 #define   KVM_DEV_VFIO_DEVICE_FORWARD_IRQ  1
 #define   KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ2
+#define   KVM_DEV_VFIO_DEVICE_POST_IRQ 3
+#define   KVM_DEV_VFIO_DEVICE_UNPOST_IRQ   4
 
 enum kvm_device_type {
KVM_DEV_TYPE_FSL_MPIC_20= 1,
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v4 09/16] VFIO: external user API for interaction

2015-06-11 Thread Feng Wu
From: Eric Auger 

The VFIO external user API is enriched with 3 new functions that
allows a kernel user external to VFIO to retrieve some information
from a VFIO device.

- vfio_device_get_external_user enables to get a vfio device from
  its fd and increments its reference counter
- vfio_device_put_external_user decrements the reference counter
- vfio_external_base_device returns a handle to the struct device

Signed-off-by: Eric Auger 
---
 drivers/vfio/vfio.c  | 24 
 include/linux/vfio.h |  3 +++
 2 files changed, 27 insertions(+)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index e1278fe..c10b3cb 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1504,6 +1504,30 @@ void vfio_group_put_external_user(struct vfio_group 
*group)
 }
 EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
 
+struct vfio_device *vfio_device_get_external_user(struct file *filep)
+{
+   struct vfio_device *vdev = filep->private_data;
+
+   if (filep->f_op != &vfio_device_fops)
+   return ERR_PTR(-EINVAL);
+
+   vfio_device_get(vdev);
+   return vdev;
+}
+EXPORT_SYMBOL_GPL(vfio_device_get_external_user);
+
+void vfio_device_put_external_user(struct vfio_device *vdev)
+{
+   vfio_device_put(vdev);
+}
+EXPORT_SYMBOL_GPL(vfio_device_put_external_user);
+
+struct device *vfio_external_base_device(struct vfio_device *vdev)
+{
+   return vdev->dev;
+}
+EXPORT_SYMBOL_GPL(vfio_external_base_device);
+
 int vfio_external_user_iommu_id(struct vfio_group *group)
 {
return iommu_group_id(group->iommu_group);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index ddb4409..e120e9a 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -88,6 +88,9 @@ extern void vfio_group_put_external_user(struct vfio_group 
*group);
 extern int vfio_external_user_iommu_id(struct vfio_group *group);
 extern long vfio_external_check_extension(struct vfio_group *group,
  unsigned long arg);
+extern struct vfio_device *vfio_device_get_external_user(struct file *filep);
+extern void vfio_device_put_external_user(struct vfio_device *vdev);
+extern struct device *vfio_external_base_device(struct vfio_device *vdev);
 
 struct pci_dev;
 #ifdef CONFIG_EEH
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC v1 1/5] vfio: Register/unregister irq_bypass_producer

2015-07-09 Thread Feng Wu
This patch adds the registration/unregistration of an
irq_bypass_producer for MSI/MSIx on vfio pci devices.

Signed-off-by: Feng Wu 
---
 drivers/vfio/pci/vfio_pci_intrs.c   | 8 ++--
 drivers/vfio/pci/vfio_pci_private.h | 2 ++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
b/drivers/vfio/pci/vfio_pci_intrs.c
index 4e053be..6e86292 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -322,7 +322,7 @@ static int vfio_msi_set_vector_signal(struct 
vfio_pci_device *vdev,
 
if (vdev->ctx[vector].trigger) {
free_irq(irq, vdev->ctx[vector].trigger);
-   /* irq_bypass_unregister_producer(); */
+   irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
kfree(vdev->ctx[vector].name);
eventfd_ctx_put(vdev->ctx[vector].trigger);
vdev->ctx[vector].trigger = NULL;
@@ -364,7 +364,11 @@ static int vfio_msi_set_vector_signal(struct 
vfio_pci_device *vdev,
return ret;
}
 
-   /* irq_bypass_register_producer(); */
+   INIT_LIST_HEAD(&vdev->ctx[vector].producer.node);
+   vdev->ctx[vector].producer.token = trigger;
+   vdev->ctx[vector].producer.irq = irq;
+   ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
+   WARN_ON(ret);
 
vdev->ctx[vector].trigger = trigger;
 
diff --git a/drivers/vfio/pci/vfio_pci_private.h 
b/drivers/vfio/pci/vfio_pci_private.h
index ae0e1b4..0e7394f 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -13,6 +13,7 @@
 
 #include 
 #include 
+#include 
 
 #ifndef VFIO_PCI_PRIVATE_H
 #define VFIO_PCI_PRIVATE_H
@@ -29,6 +30,7 @@ struct vfio_pci_irq_ctx {
struct virqfd   *mask;
char*name;
boolmasked;
+   struct irq_bypass_producer  producer;
 };
 
 struct vfio_pci_device {
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC v1 0/5] irq bypass interface implementation for VT-d Posted-interrupts

2015-07-09 Thread Feng Wu
This series is based on Alex and Eric's irq bypass manager framework. To
make things clear, I only send out the patches related to irq bypass
manager, the purpose here is to show how certain callbacks are used
in VT-d PI and help to improve the irqbypass manager itself.

Feng Wu (5):
  vfio: Register/unregister irq_bypass_producer
  KVM: x86: Update IRTE for posted-interrupts
  KVM: Add pointer to 'struct irq_bypass_produce' in 'kvm_kernel_irqfd'
  KVM: x86: Add arch specific routines for irqbypass manager
  Call irqbypass update routine when updating irqfd

 arch/x86/kvm/Kconfig|   1 +
 arch/x86/kvm/x86.c  | 128 
 drivers/vfio/pci/vfio_pci_intrs.c   |   8 ++-
 drivers/vfio/pci/vfio_pci_private.h |   2 +
 include/linux/kvm_irqfd.h   |   1 +
 virt/kvm/eventfd.c  |   4 +-
 6 files changed, 141 insertions(+), 3 deletions(-)

-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC v1 3/5] KVM: Add pointer to 'struct irq_bypass_produce' in 'kvm_kernel_irqfd'

2015-07-09 Thread Feng Wu
Add reference to struct irq_bypass_produce so we can get the producer
information from the consumer side.

Signed-off-by: Feng Wu 
---
 include/linux/kvm_irqfd.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index 3c0bd07..0c1de05 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -65,6 +65,7 @@ struct kvm_kernel_irqfd {
poll_table pt;
struct work_struct shutdown;
struct irq_bypass_consumer consumer;
+   struct irq_bypass_producer *producer;
 };
 
 #endif /* __LINUX_KVM_IRQFD_H */
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC v1 5/5] Call irqbypass update routine when updating irqfd

2015-07-09 Thread Feng Wu
Call update routine when updating irqfd, this can update the
IRTE for Intel posted-interrupts.

Signed-off-by: Feng Wu 
---
 virt/kvm/eventfd.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a32cf6c..1226835 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -570,8 +570,10 @@ void kvm_irq_routing_update(struct kvm *kvm)
 
spin_lock_irq(&kvm->irqfds.lock);
 
-   list_for_each_entry(irqfd, &kvm->irqfds.items, list)
+   list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
irqfd_update(kvm, irqfd);
+   irqfd->consumer.update(&irqfd->consumer);
+   }
 
spin_unlock_irq(&kvm->irqfds.lock);
 }
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC v1 2/5] KVM: x86: Update IRTE for posted-interrupts

2015-07-09 Thread Feng Wu
This patch adds the routine to update IRTE for posted-interrupts
when guest changes the interrupt configuration.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/x86.c | 73 ++
 1 file changed, 73 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 26eaeb5..d81ac02 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -63,6 +63,7 @@
 #include  /* Ugh! */
 #include 
 #include 
+#include 
 
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
@@ -7950,6 +7951,78 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
 }
 EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
 
+/*
+ * kvm_arch_update_pi_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * @set: set or unset PI
+ * returns 0 on success, < 0 on failure
+ */
+int kvm_arch_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+   uint32_t guest_irq, bool set)
+{
+   struct kvm_kernel_irq_routing_entry *e;
+   struct kvm_irq_routing_table *irq_rt;
+   struct kvm_lapic_irq irq;
+   struct kvm_vcpu *vcpu;
+   struct vcpu_data vcpu_info;
+   int idx, ret = -EINVAL;
+
+   if (!irq_remapping_cap(IRQ_POSTING_CAP))
+   return 0;
+
+   idx = srcu_read_lock(&kvm->irq_srcu);
+   irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+   BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
+
+   hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
+   if (e->type != KVM_IRQ_ROUTING_MSI)
+   continue;
+   /*
+* VT-d PI cannot support posting multicast/broadcast
+* interrupts to a VCPU, we still use interrupt remapping
+* for these kind of interrupts.
+*
+* For lowest-priority interrupts, we only support
+* those with single CPU as the destination, e.g. user
+* configures the interrupts via /proc/irq or uses
+* irqbalance to make the interrupts single-CPU.
+*
+* We will support full lowest-priority interrupt later.
+*
+*/
+
+   kvm_set_msi_irq(e, &irq);
+   if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
+   continue;
+
+   vcpu_info.pi_desc_addr = kvm_x86_ops->get_pi_desc_addr(vcpu);
+   vcpu_info.vector = irq.vector;
+
+   if (set)
+   ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
+   else {
+   /* suppress notification event before unposting */
+   kvm_x86_ops->pi_set_sn(vcpu);
+   ret = irq_set_vcpu_affinity(host_irq, NULL);
+   kvm_x86_ops->pi_clear_sn(vcpu);
+   }
+
+   if (ret < 0) {
+   printk(KERN_INFO "%s: failed to update PI IRTE\n",
+   __func__);
+   goto out;
+   }
+   }
+
+   ret = 0;
+out:
+   srcu_read_unlock(&kvm->irq_srcu, idx);
+   return ret;
+}
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC v1 4/5] KVM: x86: Add arch specific routines for irqbypass manager

2015-07-09 Thread Feng Wu
Add the following x86 specific routines for irqbypass manger:

- kvm_arch_irq_bypass_add_producer
- kvm_arch_irq_bypass_del_producer
- kvm_arch_irq_bypass_stop
- kvm_arch_irq_bypass_resume
- kvm_arch_irq_bypass_update

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/Kconfig |  1 +
 arch/x86/kvm/x86.c   | 55 
 2 files changed, 56 insertions(+)

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 22f6fcb..ae68f2a 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -62,6 +62,7 @@ config KVM_INTEL
# for perf_guest_get_msrs():
depends on CPU_SUP_INTEL
select IRQ_BYPASS_MANAGER
+   select HAVE_KVM_IRQ_BYPASS
---help---
  Provides support for KVM on Intel processors equipped with the VT
  extensions.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d81ac02..a59f7e3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -49,6 +49,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 
 #define CREATE_TRACE_POINTS
@@ -8023,6 +8025,59 @@ out:
return ret;
 }
 
+void kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+   int ret;
+   struct kvm_kernel_irqfd *irqfd =
+   container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+   irqfd->producer = prod;
+
+   ret = kvm_arch_update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 1);
+   WARN_ON(ret);
+}
+
+void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+   int ret;
+   struct kvm_kernel_irqfd *irqfd =
+   container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+   irqfd->producer = NULL;
+
+   /*
+* When producer of consumer is unregistered, we change back to
+* remapped mode, so we can re-use the current implementation
+* when the irq is masked/disabed or the consumer side (KVM
+* int this case doesn't want to receive the interrupts.
+*/
+   ret = kvm_arch_update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
+   WARN_ON(ret);
+}
+
+void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
+{
+}
+
+void kvm_arch_irq_bypass_resume(struct irq_bypass_consumer *cons)
+{
+}
+
+void kvm_arch_irq_bypass_update(struct irq_bypass_consumer *cons)
+{
+   int ret;
+   struct kvm_kernel_irqfd *irqfd =
+   container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+   BUG_ON(!irqfd->producer);
+
+   ret = kvm_arch_update_pi_irte(irqfd->kvm, irqfd->producer->irq,
+ irqfd->gsi, 1);
+   WARN_ON(ret);
+}
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 06/19] KVM: Make struct kvm_irq_routing_table accessible

2015-07-13 Thread Feng Wu
Move struct kvm_irq_routing_table from irqchip.c to kvm_host.h,
so we can use it outside of irqchip.c.

Signed-off-by: Feng Wu 
---
 include/linux/kvm_host.h | 15 +++
 virt/kvm/irqchip.c   | 11 ---
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ad45054..f591f7c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -321,6 +321,21 @@ struct kvm_kernel_irq_routing_entry {
struct hlist_node link;
 };
 
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+
+struct kvm_irq_routing_table {
+   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+   struct kvm_kernel_irq_routing_entry *rt_entries;
+   u32 nr_rt_entries;
+   /*
+* Array indexed by gsi. Each entry contains list of irq chips
+* the gsi is connected to.
+*/
+   struct hlist_head map[0];
+};
+
+#endif
+
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
 #endif
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 1d56a90..bac3b52 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,17 +31,6 @@
 #include 
 #include "irq.h"
 
-struct kvm_irq_routing_table {
-   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
-   struct kvm_kernel_irq_routing_entry *rt_entries;
-   u32 nr_rt_entries;
-   /*
-* Array indexed by gsi. Each entry contains list of irq chips
-* the gsi is connected to.
-*/
-   struct hlist_head map[0];
-};
-
 int kvm_irq_map_gsi(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *entries, int gsi)
 {
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 13/19] KVM: x86: Update IRTE for posted-interrupts

2015-07-13 Thread Feng Wu
This patch adds the routine to update IRTE for posted-interrupts
when guest changes the interrupt configuration.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/x86.c | 73 ++
 1 file changed, 73 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 26eaeb5..d81ac02 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -63,6 +63,7 @@
 #include  /* Ugh! */
 #include 
 #include 
+#include 
 
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
@@ -7950,6 +7951,78 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
 }
 EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
 
+/*
+ * kvm_arch_update_pi_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * @set: set or unset PI
+ * returns 0 on success, < 0 on failure
+ */
+int kvm_arch_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+   uint32_t guest_irq, bool set)
+{
+   struct kvm_kernel_irq_routing_entry *e;
+   struct kvm_irq_routing_table *irq_rt;
+   struct kvm_lapic_irq irq;
+   struct kvm_vcpu *vcpu;
+   struct vcpu_data vcpu_info;
+   int idx, ret = -EINVAL;
+
+   if (!irq_remapping_cap(IRQ_POSTING_CAP))
+   return 0;
+
+   idx = srcu_read_lock(&kvm->irq_srcu);
+   irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+   BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
+
+   hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
+   if (e->type != KVM_IRQ_ROUTING_MSI)
+   continue;
+   /*
+* VT-d PI cannot support posting multicast/broadcast
+* interrupts to a VCPU, we still use interrupt remapping
+* for these kind of interrupts.
+*
+* For lowest-priority interrupts, we only support
+* those with single CPU as the destination, e.g. user
+* configures the interrupts via /proc/irq or uses
+* irqbalance to make the interrupts single-CPU.
+*
+* We will support full lowest-priority interrupt later.
+*
+*/
+
+   kvm_set_msi_irq(e, &irq);
+   if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
+   continue;
+
+   vcpu_info.pi_desc_addr = kvm_x86_ops->get_pi_desc_addr(vcpu);
+   vcpu_info.vector = irq.vector;
+
+   if (set)
+   ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
+   else {
+   /* suppress notification event before unposting */
+   kvm_x86_ops->pi_set_sn(vcpu);
+   ret = irq_set_vcpu_affinity(host_irq, NULL);
+   kvm_x86_ops->pi_clear_sn(vcpu);
+   }
+
+   if (ret < 0) {
+   printk(KERN_INFO "%s: failed to update PI IRTE\n",
+   __func__);
+   goto out;
+   }
+   }
+
+   ret = 0;
+out:
+   srcu_read_unlock(&kvm->irq_srcu, idx);
+   return ret;
+}
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v5 19/19] KVM: Warn if 'SN' is set during posting interrupts by software

2015-07-13 Thread Feng Wu
Currently, we don't support urgent interrupt, all interrupts
are recognized as non-urgent interrupt, so we cannot post
interrupts when 'SN' is set.

If the vcpu is in guest mode, it cannot have been scheduled out,
and that's the only case when SN is set currently, warning if
SN is set.

Signed-off-by: Feng Wu 
---
 arch/x86/kvm/vmx.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index cecd018..d4d5abc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4484,6 +4484,22 @@ static inline bool 
kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_SMP
if (vcpu->mode == IN_GUEST_MODE) {
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   /*
+* Currently, we don't support urgent interrupt,
+* all interrupts are recognized as non-urgent
+* interrupt, so we cannot post interrupts when
+* 'SN' is set.
+*
+* If the vcpu is in guest mode, it means it is
+* running instead of being scheduled out and
+* waiting in the run queue, and that's the only
+* case when 'SN' is set currently, warning if
+* 'SN' is set.
+*/
+   WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc));
+
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
POSTED_INTR_VECTOR);
return true;
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


  1   2   3   >