>>> On 28.01.16 at 06:12, <feng...@intel.com> wrote: > --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -83,7 +83,140 @@ static int vmx_msr_write_intercept(unsigned int msr, > uint64_t msr_content); > static void vmx_invlpg_intercept(unsigned long vaddr); > static int vmx_vmfunc_intercept(struct cpu_user_regs *regs); > > +/* > + * We maintain a per-CPU linked-list of vCPUs, so in PI wakeup > + * handler we can find which vCPU should be woken up. > + */ > +static DEFINE_PER_CPU(struct list_head, pi_blocked_vcpu); > +static DEFINE_PER_CPU(spinlock_t, pi_blocked_vcpu_lock); > + > uint8_t __read_mostly posted_intr_vector; > +static uint8_t __read_mostly pi_wakeup_vector; > + > +void vmx_pi_per_cpu_init(unsigned int cpu) > +{ > + INIT_LIST_HEAD(&per_cpu(pi_blocked_vcpu, cpu)); > + spin_lock_init(&per_cpu(pi_blocked_vcpu_lock, cpu)); > +} > + > +static void vmx_vcpu_block(struct vcpu *v) > +{ > + unsigned long flags; > + unsigned int dest; > + > + struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
Stray blank line between declarations. > + ASSERT(v->arch.hvm_vmx.pi_block_list_lock == NULL); I think this needs to be done after acquiring the lock. > + /* The vCPU is blocking, we need to add it to one of the per-cpu lists. > */ > + v->arch.hvm_vmx.pi_block_list_lock = > + &per_cpu(pi_blocked_vcpu_lock, v->processor); > + > + spin_lock_irqsave(v->arch.hvm_vmx.pi_block_list_lock, flags); > + list_add_tail(&v->arch.hvm_vmx.pi_blocked_vcpu_list, > + &per_cpu(pi_blocked_vcpu, v->processor)); > + spin_unlock_irqrestore(v->arch.hvm_vmx.pi_block_list_lock, flags); > + > + ASSERT(!pi_test_sn(pi_desc)); > + > + dest = cpu_physical_id(v->processor); > + > + ASSERT(pi_desc->ndst == > + (x2apic_enabled ? dest: MASK_INSR(dest, PI_xAPIC_NDST_MASK))); Missing space before colon. > +static void vmx_pi_switch_from(struct vcpu *v) > +{ > + struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc; > + > + if ( test_bit(_VPF_blocked, &v->pause_flags) ) > + return; > + > + /* > + * The vCPU has been preempted or went to sleep. We don't > + * need to send notification event to a runnable or sleeping > + * vcpu, the interrupt information will be delivered to it > + * before VM-ENTRY when the vcpu is scheduled to run next time. > + */ > + pi_set_sn(pi_desc); > +} The comment confuses me: A runnable vCPU certainly doesn't need waking, but a sleeping one? Why wouldn't an interrupt coming in not need to wake that vCPU, if this interrupt is what it's waiting for? > +static void vmx_pi_do_resume(struct vcpu *v) > +{ > + unsigned long flags; > + spinlock_t *pi_block_list_lock; > + struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc; > + > + ASSERT(!test_bit(_VPF_blocked, &v->pause_flags)); > + > + /* > + * Set 'NV' field back to posted_intr_vector, so the > + * Posted-Interrupts can be delivered to the vCPU when > + * it is running in non-root mode. > + */ > + if ( pi_desc->nv != posted_intr_vector ) > + write_atomic(&pi_desc->nv, posted_intr_vector); Perhaps this was discussed before, but I don't recall and now wonder - why inside an if()? This is a simple memory write on x86. > + /* The vCPU is not on any blocking list. */ > + pi_block_list_lock = v->arch.hvm_vmx.pi_block_list_lock; > + if ( pi_block_list_lock == NULL ) > + return; As said before, I think you're missing a barrier here, to prevent the compiler from eliminating the local variable. > +/* This function is called when pcidevs_lock is held */ > +void vmx_pi_hooks_reassign(struct domain *source, struct domain *target) > +{ > + if (!iommu_intpost) Coding style. > + return; > + > + if ( has_hvm_container_domain(source) && > + source->arch.hvm_domain.vmx.vcpu_block && !has_arch_pdevs(source) ) > { Again. > + source->arch.hvm_domain.vmx.vcpu_block = NULL; > + source->arch.hvm_domain.vmx.pi_switch_from = NULL; > + source->arch.hvm_domain.vmx.pi_switch_to = NULL; > + source->arch.hvm_domain.vmx.pi_do_resume = NULL; > + } > + > + if ( has_hvm_container_domain(target) && > + !target->arch.hvm_domain.vmx.vcpu_block && has_arch_pdevs(target) ) > { And again (not going to mention any further ones). > --- a/xen/drivers/passthrough/vtd/iommu.c > +++ b/xen/drivers/passthrough/vtd/iommu.c > @@ -2293,6 +2293,8 @@ static int reassign_device_ownership( > pdev->domain = target; > } > > + vmx_pi_hooks_reassign(source, target); > + > return ret; > } I think you need to clear source's hooks here, but target's need to get set ahead of the actual assignment. > --- a/xen/include/asm-x86/hvm/hvm.h > +++ b/xen/include/asm-x86/hvm/hvm.h > @@ -565,6 +565,11 @@ const char *hvm_efer_valid(const struct vcpu *v, > uint64_t value, > signed int cr0_pg); > unsigned long hvm_cr4_guest_reserved_bits(const struct vcpu *v, bool_t > restore); > > +#define arch_vcpu_block(v) ({ \ > + if ( (v)->domain->arch.hvm_domain.vmx.vcpu_block ) \ > + (v)->domain->arch.hvm_domain.vmx.vcpu_block((v)); \ Stray double parentheses. But - why is this a macro all of the sudden anyway? > @@ -160,6 +165,15 @@ struct arch_vmx_struct { > struct page_info *vmwrite_bitmap; > > struct page_info *pml_pg; > + > + struct list_head pi_blocked_vcpu_list; > + > + /* > + * Before it is blocked, vCPU is added to the per-cpu list. > + * VT-d engine can send wakeup notification event to the > + * pCPU and wakeup the related vCPU. > + */ > + spinlock_t *pi_block_list_lock; > }; Wasn't the comment meant to go ahead of both new fields? For the lock pointer alone it doesn't make much sense in the way it's written right now. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel