Amit Shah wrote:
> There are a couple of things here that might need some error handling:
>
> * On Tuesday 26 August 2008 14:25:35 Amit Shah wrote:
>> From: Ben-Ami Yassour <[EMAIL PROTECTED]>
>>
>> Based on a patch by: Kay, Allen M <[EMAIL PROTECTED]>
>>
>> This patch enables PCI device assignment based on VT-d support.
>> When a device is assigned to the guest, the guest memory is pinned
>> and
>> the mapping is updated in the VT-d IOMMU.
>>
>> [Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
>> and also control enable/disable from userspace]
>>
>> Signed-off-by: Kay, Allen M <[EMAIL PROTECTED]>
>> Signed-off-by: Weidong Han <[EMAIL PROTECTED]>
>> Signed-off-by: Ben-Ami Yassour <[EMAIL PROTECTED]>
>> Signed-off-by: Amit Shah <[EMAIL PROTECTED]>
>
>
>> +#include <linux/list.h>
>> +#include <linux/kvm_host.h>
>> +#include <linux/pci.h>
>> +#include <linux/dmar.h>
>> +#include <linux/intel-iommu.h>
>> +
>> +static int kvm_iommu_unmap_memslots(struct kvm *kvm); +
>> +int kvm_iommu_map_pages(struct kvm *kvm,
>> + gfn_t base_gfn, unsigned long npages)
>> +{
>> + gfn_t gfn = base_gfn;
>> + pfn_t pfn;
>> + int i, rc;
>> + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; +
>> + /* check if iommu exists and in use */
>> + if (!domain)
>> + return 0;
>> +
>> + for (i = 0; i < npages; i++) {
>> + /* check if already mapped */
>> + pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
>> + gfn_to_gpa(gfn));
>> + if (pfn && !is_mmio_pfn(pfn))
>> + continue;
>> +
>> + pfn = gfn_to_pfn(kvm, gfn);
>> + if (!is_mmio_pfn(pfn)) {
>> + rc = intel_iommu_page_mapping(domain,
>> + gfn_to_gpa(gfn),
>> + pfn_to_hpa(pfn),
>> + PAGE_SIZE,
>> + DMA_PTE_READ |
>> + DMA_PTE_WRITE);
>> + if (rc) {
>> + kvm_release_pfn_clean(pfn);
>> + printk(KERN_DEBUG "kvm_iommu_map_pages:"
>> + "iommu failed to map pfn=%lx\n",
pfn);
>> + return rc;
>> + }
>> + } else {
>> + printk(KERN_DEBUG "kvm_iommu_map_page:"
>> + "invalid pfn=%lx\n", pfn);
>> + return 0;
>> + }
>
> In the error case, this function should itself call unmap_pages so
> that either all pages are mapped or none are. Also makes it easier to
> bail out in the two places this function gets called.
>
Good catch. Will fix it.
>> +
>> + gfn++;
>> + }
>> + return 0;
>> +}
>> +
>> +static int kvm_iommu_map_memslots(struct kvm *kvm) +{
>> + int i, rc;
>> +
>> + down_read(&kvm->slots_lock);
>> + for (i = 0; i < kvm->nmemslots; i++) {
>> + rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
>> + kvm->memslots[i].npages);
>> + if (rc) {
>> + up_read(&kvm->slots_lock);
>> + return rc;
>> + }
>> + }
>> + up_read(&kvm->slots_lock);
>> + return 0;
>> +}
>> +
>> +int kvm_iommu_map_guest(struct kvm *kvm,
>> + struct kvm_assigned_dev_kernel *assigned_dev)
>> +{
>> + struct pci_dev *pdev = NULL;
>> + int rc;
>> +
>> + if (!intel_iommu_found()) {
>> + printk(KERN_ERR "intel iommu not found\n");
>> + return -ENODEV;
>> + }
>> +
>> + printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
>> + assigned_dev->host_busnr,
>> + PCI_SLOT(assigned_dev->host_devfn),
>> + PCI_FUNC(assigned_dev->host_devfn));
>> +
>> + pdev = assigned_dev->dev;
>> +
>> + if (pdev == NULL) {
>> + if (kvm->arch.intel_iommu_domain) {
>> +
intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
>> + kvm->arch.intel_iommu_domain = NULL;
>> + }
>> + return -ENODEV;
>> + }
>> +
>> + kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
>
> check if we really got the domain
yes, need a check here.
>
>> +
>> + rc = kvm_iommu_map_memslots(kvm);
>> + if (rc)
>> + goto out_unmap;
>> +
>> + intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
>> + pdev->bus->number, pdev->devfn);
>> +
>> + rc = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
>> + pdev);
>
> This function name (as Mark points out) doesn't make much sense; can
> this be changed?
This function name keeps consistent with original name in vtd driver. do
you want to add a verb in the name?
>
>> + if (rc) {
>> + printk(KERN_ERR "Domain context map for %s failed", +
>> pci_name(pdev)); + goto out_unmap;
>> + }
>> + return 0;
>> +
>> +out_unmap:
>> + kvm_iommu_unmap_memslots(kvm);
>> + return rc;
>> +}
>> +
>> +static void kvm_iommu_put_pages(struct kvm *kvm,
>> + gfn_t base_gfn, unsigned long npages)
>> +{
>> + gfn_t gfn = base_gfn;
>> + pfn_t pfn;
>> + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; +
int i;
>> +
>> + for (i = 0; i < npages; i++) {
>> + pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
>> + gfn_to_gpa(gfn));
>> + kvm_release_pfn_clean(pfn);
>> + gfn++;
>> + }
>> +}
>> +
>> +static int kvm_iommu_unmap_memslots(struct kvm *kvm) +{
>> + int i;
>> + down_read(&kvm->slots_lock);
>> + for (i = 0; i < kvm->nmemslots; i++) {
>> + kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
>> + kvm->memslots[i].npages);
>> + }
>> + up_read(&kvm->slots_lock);
>> +
>> + return 0;
>> +}
>> +
>> +int kvm_iommu_unmap_guest(struct kvm *kvm)
>> +{
>> + struct kvm_assigned_dev_kernel *entry;
>> + struct pci_dev *pdev = NULL;
>> + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; +
>> + /* check if iommu exists and in use */
>> + if (!domain)
>> + return 0;
>> +
>> + list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
>> + printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n", +
>> entry->host_busnr, + PCI_SLOT(entry->host_devfn),
>> + PCI_FUNC(entry->host_devfn));
>> +
>> + for_each_pci_dev(pdev) {
>> + if ((pdev->bus->number == entry->host_busnr) &&
>> + (pdev->devfn == entry->host_devfn))
>> + break;
>> + }
>
> We store the PCI dev in entry->dev; no need to scan this entire list.
yes, it's not neccessary.
>
>> +
>> + if (pdev == NULL)
>> + return -ENODEV;
>> +
>> + /* detach kvm dmar domain */
>> + intel_iommu_detach_dev(domain,
>> + pdev->bus->number, pdev->devfn);
>> + }
>> + kvm_iommu_unmap_memslots(kvm);
>> + intel_iommu_domain_exit(domain);
>> + return 0;
>> +}
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index a18aaad..b703890 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -285,6 +285,33 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
>> int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
>> void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
>>
>> +#ifdef CONFIG_DMAR
>> +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
>> + unsigned long npages); +int
kvm_iommu_map_guest(struct kvm *kvm,
>> + struct kvm_assigned_dev_kernel *assigned_dev);
>> +int kvm_iommu_unmap_guest(struct kvm *kvm);
>> +#else /* CONFIG_DMAR */
>> +static inline int kvm_iommu_map_pages(struct kvm *kvm, +
>> gfn_t base_gfn, + unsigned long
npages)
>> +{
>> + return 0;
>> +}
>> +
>> +static inline int kvm_iommu_map_guest(struct kvm *kvm,
>> + struct kvm_assigned_dev_kernel
>> + *assigned_dev)
>> +{
>> + return -ENODEV;
>> +}
>> +
>> +static inline int kvm_iommu_unmap_guest(struct kvm *kvm) +{
>> + return 0;
>> +}
>> +#endif /* CONFIG_DMAR */
>> +
>> static inline void kvm_guest_enter(void)
>> {
>> account_system_vtime(current);
>> @@ -307,6 +334,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
>> return (gpa_t)gfn << PAGE_SHIFT;
>> }
>>
>> +static inline hpa_t pfn_to_hpa(pfn_t pfn)
>> +{
>> + return (hpa_t)pfn << PAGE_SHIFT;
>> +}
>> +
>
> This can be a separate patch.
>
>> static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) {
>> set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index 0309571..191bfe1 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -41,6 +41,7 @@
>> #include <linux/pagemap.h>
>> #include <linux/mman.h>
>> #include <linux/swap.h>
>> +#include <linux/intel-iommu.h>
>>
>> #include <asm/processor.h>
>> #include <asm/io.h>
>> @@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
>> return likely(n >= 0 && n < KVM_MAX_VCPUS);
>> }
>>
>> -static inline int is_mmio_pfn(pfn_t pfn)
>> +inline int is_mmio_pfn(pfn_t pfn)
>> {
>> if (pfn_valid(pfn))
>> return PageReserved(pfn_to_page(pfn));
>> @@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
}
>>
>> kvm_free_physmem_slot(&old, &new);
>> +
>> + /* map the pages in iommu page table */
>> + r = kvm_iommu_map_pages(kvm, base_gfn, npages);
>> + if (r)
>> + goto out_free;
>
> Doing the unmapping in the map function will mean we don't have to
> check for error return values here.
>
>> +
>> return 0;
>>
>> out_free:
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html