2016-07-25 04:32-0500, Suravee Suthikulpanit:
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> @@ -1485,9 +1521,16 @@ static void avic_set_running(struct kvm_vcpu *vcpu, 
> bool is_run)
>       WARN_ON(is_run == !!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK));
>  
>       entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
> -     if (is_run)
> +     if (is_run) {
>               entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
> -     WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
> +             WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
> +             avic_update_iommu(vcpu, h_physical_id,
> +                               page_to_phys(svm->avic_backing_page), 1);
> +     } else {
> +             avic_update_iommu(vcpu, h_physical_id,
> +                               page_to_phys(svm->avic_backing_page), 0);
> +             WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
> +     }

You need to do the same change twice ... I guess it is time to factor
the code. :)

Wouldn't the following be an improvement in the !is_run path too?

  static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
  {
        svm->avic_is_running = is_run;
  
        if (is_run)
                avic_vcpu_load(vcpu, vcpu->cpu);
        else
                avic_vcpu_put(vcpu);
  }

> +static void svm_pi_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data 
> *pi)
> +{
> +     bool found = false;
> +     unsigned long flags;
> +     struct amd_iommu_pi_data *cur;
> +
> +     spin_lock_irqsave(&svm->pi_list_lock, flags);
> +     list_for_each_entry(cur, &svm->pi_list, node) {
> +             if (cur->ir_data != pi->ir_data)
> +                     continue;
> +             found = true;

This optimization turned out to be ugly ... sorry.

Manipulation with pi_list is hard to understand, IMO, so a comment
explaining why we couldn't do that without traversing a list and
comparing pi->ir_data would be nice.

Maybe I was a bit confused by reusing amd_iommu_pi_data when all we care
about is a list of cur->ir_data -- can't we have a list of just ir_data?

Allocating the list node in svm_pi_list_add() would make it nicely
symmetrical with svm_pi_list_del() too.

> +             break;
> +     }
> +     if (!found)
> +             list_add(&pi->node, &svm->pi_list);
> +     spin_unlock_irqrestore(&svm->pi_list_lock, flags);
> +}
> +
> +static void svm_pi_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data 
> *pi)
> +{
> +     unsigned long flags;
> +     struct amd_iommu_pi_data *cur, *next;
> +
> +     spin_lock_irqsave(&svm->pi_list_lock, flags);
> +     list_for_each_entry_safe(cur, next, &svm->pi_list, node) {

No need to use _safe loop if you break on the first deletion.

> +             if (cur->ir_data != pi->ir_data)
> +                     continue;
> +             list_del(&cur->node);
> +             kfree(cur);
> +             break;
> +     }
> +     spin_unlock_irqrestore(&svm->pi_list_lock, flags);
> +}
> +
> +/*
> + * svm_update_pi_irte - set IRTE for Posted-Interrupts
> + *
> + * @kvm: kvm
> + * @host_irq: host irq of the interrupt
> + * @guest_irq: gsi of the interrupt
> + * @set: set or unset PI
> + * returns 0 on success, < 0 on failure
> + */
> +static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
> +                           uint32_t guest_irq, bool set)
> +{
> +     struct kvm_kernel_irq_routing_entry *e;
> +     struct kvm_irq_routing_table *irq_rt;
> +     int idx, ret = -EINVAL;
> +
> +     if (!kvm_arch_has_assigned_device(kvm) ||
> +         !irq_remapping_cap(IRQ_POSTING_CAP))
> +             return 0;
> +
> +     pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
> +              __func__, host_irq, guest_irq, set);
> +
> +     idx = srcu_read_lock(&kvm->irq_srcu);
> +     irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
> +     WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
> +
> +     hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
> +             struct kvm_lapic_irq irq;
> +             struct vcpu_data vcpu_info;
> +             struct kvm_vcpu *vcpu = NULL;
> +             struct vcpu_svm *svm = NULL;
> +
> +             if (e->type != KVM_IRQ_ROUTING_MSI)
> +                     continue;
> +
> +             /**
> +              * Note:
> +              * The HW cannot support posting multicast/broadcast
> +              * interrupts to a vCPU. So, we still use interrupt
> +              * remapping for these kind of interrupts.
> +              *
> +              * For lowest-priority interrupts, we only support
> +              * those with single CPU as the destination, e.g. user
> +              * configures the interrupts via /proc/irq or uses
> +              * irqbalance to make the interrupts single-CPU.
> +              */
> +             kvm_set_msi_irq(e, &irq);
> +             if (kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
> +                     svm = to_svm(vcpu);
> +                     vcpu_info.pi_desc_addr = 
> page_to_phys(svm->avic_backing_page);
> +                     vcpu_info.vector = irq.vector;
> +
> +                     trace_kvm_pi_irte_update(vcpu->vcpu_id, host_irq, 
> e->gsi,
> +                                              vcpu_info.vector,
> +                                              vcpu_info.pi_desc_addr, set);

Trace below the if/else, we might update something in both of them.

> +
> +                     pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
> +                              irq.vector);
> +             } else {
> +                     set = false;
> +
> +                     pr_debug("SVM: %s: use legacy intr remap mode for irq 
> %u\n",
> +                              __func__, irq.vector);
> +             }
> +
> +             /**
> +              * When AVIC is disabled, we fall-back to setup
> +              * IRTE w/ legacy mode
> +              */
> +             if (set && kvm_vcpu_apicv_active(&svm->vcpu)) {
> +                     struct amd_iommu_pi_data *pi_data;
> +
> +                     /**
> +                      * Allocating new amd_iommu_pi_data, which will get
> +                      * add to the per-vcpu pi_list.
> +                      */
> +                     pi_data = kzalloc(sizeof(struct amd_iommu_pi_data),
> +                                       GFP_KERNEL);
> +                     if (!pi_data) {
> +                             ret = -ENOMEM;
> +                             goto out;
> +                     }
> +
> +                     /* Try to enable guest_mode in IRTE */
> +                     pi_data->ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id,
> +                                                  vcpu->vcpu_id);
> +                     pi_data->vcpu_data = &vcpu_info;
> +                     pi_data->is_guest_mode = true;
> +                     ret = irq_set_vcpu_affinity(host_irq, pi_data);
> +
> +                     /**
> +                      * We save the pointer to pi_data in the struct
> +                      * vcpu_svm so that we can reference to them directly
> +                      * when we update vcpu scheduling information in IOMMU
> +                      * irte.
> +                      */
> +                     if (!ret && pi_data->is_guest_mode)
> +                             svm_pi_list_add(svm, pi_data);

pi_data leaks in the else case.

(Allocating the element in svm_pi_list_add() would solve this.)

> +             } else {
> +                     /* Use legacy mode in IRTE */
> +                     struct amd_iommu_pi_data pi;
> +
> +                     /**
> +                      * Here, pi is used to:
> +                      * - Tell IOMMU to use legacy mode for this interrupt.
> +                      * - Retrieve ga_tag of prior interrupt remapping data.
> +                      */
> +                     pi.is_guest_mode = false;
> +                     ret = irq_set_vcpu_affinity(host_irq, &pi);
> +
> +                     /**
> +                      * We need to check if the interrupt was previously
> +                      * setup with the guest_mode by checking if the ga_tag
> +                      * was cached. If so, we need to clean up the per-vcpu
> +                      * pi_list.
> +                      */
> +                     if (!ret && pi.ga_tag) {
> +                             struct kvm_vcpu *vcpu = kvm_get_vcpu_by_id(kvm,
> +                                             
> AVIC_GATAG_TO_VCPUID(pi.ga_tag));
> +
> +                             if (vcpu)
> +                                     svm_pi_list_del(to_svm(vcpu), &pi);
> +                     }
> +             }
> +
> +             if (ret < 0) {
> +                     pr_err("%s: failed to update PI IRTE\n", __func__);
> +                     goto out;
> +             }
> +     }
> +
> +     ret = 0;
> +out:
> +     srcu_read_unlock(&kvm->irq_srcu, idx);
> +     return ret;
> +}
> +
>  static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
>  {
>       struct vcpu_svm *svm = to_svm(vcpu);

Reply via email to