On 25/03/15 12:31, Feng Wu wrote:
The basic idea here is:
1. When vCPU's state is RUNSTATE_running,
- set 'NV' to 'Notification Vector'.
- Clear 'SN' to accpet PI.
- set 'NDST' to the right pCPU.
2. When vCPU's state is RUNSTATE_blocked,
- set 'NV' to 'Wake-up Vector', so we can wake up the
related vCPU when posted-interrupt happens for it.
- Clear 'SN' to accpet PI.
3. When vCPU's state is RUNSTATE_runnable/RUNSTATE_offline,
- Set 'SN' to suppress non-urgent interrupts.
(Current, we only support non-urgent interrupts)
- Set 'NV' back to 'Notification Vector' if needed.
Signed-off-by: Feng Wu <feng...@intel.com>
---
xen/arch/x86/hvm/vmx/vmx.c | 108 +++++++++++++++++++++++++++++++++++++++++++++
xen/common/schedule.c | 3 ++
2 files changed, 111 insertions(+)
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index b30392c..6323bd6 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1710,6 +1710,113 @@ static void vmx_handle_eoi(u8 vector)
__vmwrite(GUEST_INTR_STATUS, status);
}
+static void vmx_pi_desc_update(struct vcpu *v, int new_state)
+{
+ struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
+ struct pi_desc old, new;
+ int old_state = v->runstate.state;
+ unsigned long flags;
+
+ if ( !iommu_intpost )
+ return;
+
+ switch ( new_state )
+ {
+ case RUNSTATE_runnable:
+ case RUNSTATE_offline:
+ /*
+ * We don't need to send notification event to a non-running
+ * vcpu, the interrupt information will be delivered to it before
+ * VM-ENTRY when the vcpu is scheduled to run next time.
+ */
+ pi_set_sn(pi_desc);
+
+ /*
+ * If the state is transferred from RUNSTATE_blocked,
+ * we should set 'NV' feild back to posted_intr_vector,
+ * so the Posted-Interrupts can be delivered to the vCPU
+ * by VT-d HW after it is scheduled to run.
+ */
+ if ( old_state == RUNSTATE_blocked )
+ {
+ do
+ {
+ old.control = new.control = pi_desc->control;
+ new.nv = posted_intr_vector;
+ }
+ while ( cmpxchg(&pi_desc->control, old.control, new.control)
+ != old.control );
+
+ /*
+ * Delete the vCPU from the related wakeup queue
+ * if we are resuming from blocked state
+ */
+ spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+ v->processor), flags);
+ list_del(&v->blocked_vcpu_list);
The scheduler is perfectly able to change v->processor behind your back,
so your spinlocks are not protecting access to v->blocked_vcpu_list.
~Andrew
+ spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
+ v->processor), flags);
+ }
+ break;
+
+ case RUNSTATE_blocked:
+ /*
+ * The vCPU is blocked on the wait queue.
+ * Store the blocked vCPU on the list of the
+ * vcpu->wakeup_cpu, which is the destination
+ * of the wake-up notification event.
+ */
+ spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+ v->processor), flags);
+ list_add_tail(&v->blocked_vcpu_list,
+ &per_cpu(blocked_vcpu_on_cpu, v->processor));
+ spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
+ v->processor), flags);
+
+ do
+ {
+ old.control = new.control = pi_desc->control;
+
+ /*
+ * We should not block the vCPU if
+ * an interrupt is posted for it.
+ */
+
+ if ( pi_test_on(&old) == 1 )
+ {
+ tasklet_schedule(&v->vcpu_wakeup_tasklet);
+ return;
+ }
+
+ pi_clear_sn(&new);
+ new.nv = pi_wakeup_vector;
+ }
+ while ( cmpxchg(&pi_desc->control, old.control, new.control)
+ != old.control );
+ break;
+
+ case RUNSTATE_running:
+ ASSERT( pi_test_sn(pi_desc) == 1 );
+
+ do
+ {
+ old.control = new.control = pi_desc->control;
+ if ( x2apic_enabled )
+ new.ndst = cpu_physical_id(v->processor);
+ else
+ new.ndst = (cpu_physical_id(v->processor) << 8) & 0xFF00;
+
+ pi_clear_sn(&new);
+ }
+ while ( cmpxchg(&pi_desc->control, old.control, new.control)
+ != old.control );
+ break;
+
+ default:
+ break;
+ }
+}
+
void vmx_hypervisor_cpuid_leaf(uint32_t sub_idx,
uint32_t *eax, uint32_t *ebx,
uint32_t *ecx, uint32_t *edx)
@@ -1795,6 +1902,7 @@ static struct hvm_function_table __initdata
vmx_function_table = {
.process_isr = vmx_process_isr,
.deliver_posted_intr = vmx_deliver_posted_intr,
.sync_pir_to_irr = vmx_sync_pir_to_irr,
+ .pi_desc_update = vmx_pi_desc_update,
.handle_eoi = vmx_handle_eoi,
.nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
.hypervisor_cpuid_leaf = vmx_hypervisor_cpuid_leaf,
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index ef79847..acf3186 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -157,6 +157,9 @@ static inline void vcpu_runstate_change(
v->runstate.state_entry_time = new_entry_time;
}
+ if ( is_hvm_vcpu(v) && hvm_funcs.pi_desc_update )
+ hvm_funcs.pi_desc_update(v, new_state);
+
v->runstate.state = new_state;
}
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel