RE: [PATCH 2/2] KVM: Device Assignment with VT-d

Han, Weidong Tue, 09 Sep 2008 00:19:32 -0700

Amit Shah wrote:
> There are a couple of things here that might need some error handling:
> 
> * On Tuesday 26 August 2008 14:25:35 Amit Shah wrote:
>> From: Ben-Ami Yassour <[EMAIL PROTECTED]>
>> 
>> Based on a patch by: Kay, Allen M <[EMAIL PROTECTED]>
>> 
>> This patch enables PCI device assignment based on VT-d support.
>> When a device is assigned to the guest, the guest memory is pinned
>> and 
>> the mapping is updated in the VT-d IOMMU.
>> 
>> [Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
>> and also control enable/disable from userspace]
>> 
>> Signed-off-by: Kay, Allen M <[EMAIL PROTECTED]>
>> Signed-off-by: Weidong Han <[EMAIL PROTECTED]>
>> Signed-off-by: Ben-Ami Yassour <[EMAIL PROTECTED]>
>> Signed-off-by: Amit Shah <[EMAIL PROTECTED]>
> 
> 
>> +#include <linux/list.h>
>> +#include <linux/kvm_host.h>
>> +#include <linux/pci.h>
>> +#include <linux/dmar.h>
>> +#include <linux/intel-iommu.h>
>> +
>> +static int kvm_iommu_unmap_memslots(struct kvm *kvm); +
>> +int kvm_iommu_map_pages(struct kvm *kvm,
>> +                      gfn_t base_gfn, unsigned long npages)
>> +{
>> +    gfn_t gfn = base_gfn;
>> +    pfn_t pfn;
>> +    int i, rc;
>> +    struct dmar_domain *domain = kvm->arch.intel_iommu_domain; +
>> +    /* check if iommu exists and in use */
>> +    if (!domain)
>> +            return 0;
>> +
>> +    for (i = 0; i < npages; i++) {
>> +            /* check if already mapped */
>> +            pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
>> +                                                 gfn_to_gpa(gfn));
>> +            if (pfn && !is_mmio_pfn(pfn))
>> +                    continue;
>> +
>> +            pfn = gfn_to_pfn(kvm, gfn);
>> +            if (!is_mmio_pfn(pfn)) {
>> +                    rc = intel_iommu_page_mapping(domain,
>> +                                                  gfn_to_gpa(gfn),
>> +                                                  pfn_to_hpa(pfn),
>> +                                                  PAGE_SIZE,
>> +                                                  DMA_PTE_READ |
>> +                                                  DMA_PTE_WRITE);
>> +                    if (rc) {
>> +                            kvm_release_pfn_clean(pfn);
>> +                            printk(KERN_DEBUG "kvm_iommu_map_pages:"
>> +                                   "iommu failed to map pfn=%lx\n",
pfn);
>> +                            return rc;
>> +                    }
>> +            } else {
>> +                    printk(KERN_DEBUG "kvm_iommu_map_page:"
>> +                           "invalid pfn=%lx\n", pfn);
>> +                    return 0;
>> +            }
> 
> In the error case, this function should itself call unmap_pages so
> that either all pages are mapped or none are. Also makes it easier to
> bail out in the two places this function gets called.
>


Good catch. Will fix it.

>> +
>> +            gfn++;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static int kvm_iommu_map_memslots(struct kvm *kvm) +{
>> +    int i, rc;
>> +
>> +    down_read(&kvm->slots_lock);
>> +    for (i = 0; i < kvm->nmemslots; i++) {
>> +            rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
>> +                                     kvm->memslots[i].npages);
>> +            if (rc) {
>> +                    up_read(&kvm->slots_lock);
>> +                    return rc;
>> +            }
>> +    }
>> +    up_read(&kvm->slots_lock);
>> +    return 0;
>> +}
>> +
>> +int kvm_iommu_map_guest(struct kvm *kvm,
>> +                    struct kvm_assigned_dev_kernel *assigned_dev)
>> +{
>> +    struct pci_dev *pdev = NULL;
>> +    int rc;
>> +
>> +    if (!intel_iommu_found()) {
>> +            printk(KERN_ERR "intel iommu not found\n");
>> +            return -ENODEV;
>> +    }
>> +
>> +    printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
>> +           assigned_dev->host_busnr,
>> +           PCI_SLOT(assigned_dev->host_devfn),
>> +           PCI_FUNC(assigned_dev->host_devfn));
>> +
>> +    pdev = assigned_dev->dev;
>> +
>> +    if (pdev == NULL) {
>> +            if (kvm->arch.intel_iommu_domain) {
>> +
intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
>> +                    kvm->arch.intel_iommu_domain = NULL;
>> +            }
>> +            return -ENODEV;
>> +    }
>> +
>> +    kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
> 
> check if we really got the domain

yes, need a check here.

> 
>> +
>> +    rc = kvm_iommu_map_memslots(kvm);
>> +    if (rc)
>> +            goto out_unmap;
>> +
>> +    intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
>> +                           pdev->bus->number, pdev->devfn);
>> +
>> +    rc = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
>> +                                     pdev);
> 
> This function name (as Mark points out) doesn't make much sense; can
> this be changed?

This function name keeps consistent with original name in vtd driver. do
you want to add a verb in the name? 

> 
>> +    if (rc) {
>> +            printk(KERN_ERR "Domain context map for %s failed", +

>> pci_name(pdev)); +           goto out_unmap;
>> +    }
>> +    return 0;
>> +
>> +out_unmap:
>> +    kvm_iommu_unmap_memslots(kvm);
>> +    return rc;
>> +}
>> +
>> +static void kvm_iommu_put_pages(struct kvm *kvm,
>> +                           gfn_t base_gfn, unsigned long npages)
>> +{
>> +    gfn_t gfn = base_gfn;
>> +    pfn_t pfn;
>> +    struct dmar_domain *domain = kvm->arch.intel_iommu_domain; +
int i;
>> +
>> +    for (i = 0; i < npages; i++) {
>> +            pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
>> +                                                 gfn_to_gpa(gfn));
>> +            kvm_release_pfn_clean(pfn);
>> +            gfn++;
>> +    }
>> +}
>> +
>> +static int kvm_iommu_unmap_memslots(struct kvm *kvm) +{
>> +    int i;
>> +    down_read(&kvm->slots_lock);
>> +    for (i = 0; i < kvm->nmemslots; i++) {
>> +            kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
>> +                                kvm->memslots[i].npages);
>> +    }
>> +    up_read(&kvm->slots_lock);
>> +
>> +    return 0;
>> +}
>> +
>> +int kvm_iommu_unmap_guest(struct kvm *kvm)
>> +{
>> +    struct kvm_assigned_dev_kernel *entry;
>> +    struct pci_dev *pdev = NULL;
>> +    struct dmar_domain *domain = kvm->arch.intel_iommu_domain; +
>> +    /* check if iommu exists and in use */
>> +    if (!domain)
>> +            return 0;
>> +
>> +    list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
>> +            printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n", +

>> entry->host_busnr, +                PCI_SLOT(entry->host_devfn),
>> +                   PCI_FUNC(entry->host_devfn));
>> +
>> +            for_each_pci_dev(pdev) {
>> +                    if ((pdev->bus->number == entry->host_busnr) &&
>> +                        (pdev->devfn == entry->host_devfn))
>> +                            break;
>> +            }
> 
> We store the PCI dev in entry->dev; no need to scan this entire list.

yes, it's not neccessary.

> 
>> +
>> +            if (pdev == NULL)
>> +                    return -ENODEV;
>> +
>> +            /* detach kvm dmar domain */
>> +            intel_iommu_detach_dev(domain,
>> +                                   pdev->bus->number, pdev->devfn);
>> +    }
>> +    kvm_iommu_unmap_memslots(kvm);
>> +    intel_iommu_domain_exit(domain);
>> +    return 0;
>> +}
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index a18aaad..b703890 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
>>  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
>>  void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
>> 
>> +#ifdef CONFIG_DMAR
>> +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
>> +                    unsigned long npages); +int
kvm_iommu_map_guest(struct kvm *kvm,
>> +                    struct kvm_assigned_dev_kernel *assigned_dev);
>> +int kvm_iommu_unmap_guest(struct kvm *kvm);
>> +#else /* CONFIG_DMAR */
>> +static inline int kvm_iommu_map_pages(struct kvm *kvm, +

>> gfn_t base_gfn, +                                  unsigned long
npages)
>> +{
>> +    return 0;
>> +}
>> +
>> +static inline int kvm_iommu_map_guest(struct kvm *kvm,
>> +                                  struct kvm_assigned_dev_kernel
>> +                                  *assigned_dev)
>> +{
>> +    return -ENODEV;
>> +}
>> +
>> +static inline int kvm_iommu_unmap_guest(struct kvm *kvm) +{
>> +    return 0;
>> +}
>> +#endif /* CONFIG_DMAR */
>> +
>>  static inline void kvm_guest_enter(void)
>>  {
>>      account_system_vtime(current);
>> @@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
>>      return (gpa_t)gfn << PAGE_SHIFT;
>>  }
>> 
>> +static inline hpa_t pfn_to_hpa(pfn_t pfn)
>> +{
>> +    return (hpa_t)pfn << PAGE_SHIFT;
>> +}
>> +
> 
> This can be a separate patch.
> 
>>  static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)  {
>>      set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index 0309571..191bfe1 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -41,6 +41,7 @@
>>  #include <linux/pagemap.h>
>>  #include <linux/mman.h>
>>  #include <linux/swap.h>
>> +#include <linux/intel-iommu.h>
>> 
>>  #include <asm/processor.h>
>>  #include <asm/io.h>
>> @@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
>>      return likely(n >= 0 && n < KVM_MAX_VCPUS);
>>  }
>> 
>> -static inline int is_mmio_pfn(pfn_t pfn)
>> +inline int is_mmio_pfn(pfn_t pfn)
>>  {
>>      if (pfn_valid(pfn))
>>              return PageReserved(pfn_to_page(pfn));
>> @@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
}
>> 
>>      kvm_free_physmem_slot(&old, &new);
>> +
>> +    /* map the pages in iommu page table */
>> +    r = kvm_iommu_map_pages(kvm, base_gfn, npages);
>> +    if (r)
>> +            goto out_free;
> 
> Doing the unmapping in the map function will mean we don't have to
> check for error return values here.
> 
>> +
>>      return 0;
>> 
>>  out_free:

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH 2/2] KVM: Device Assignment with VT-d

Reply via email to