I think Andrew's comment is correct. IO NUMA is different and IOMMU structures 
should be allocated based on proximity of IOMMU itself.

Thanks
Kevin

From: Andrew Cooper [mailto:andrew.coop...@citrix.com]
Sent: Friday, March 06, 2015 1:09 AM
To: Jan Beulich; xen-devel
Cc: Zhang, Yang Z; Dario Faggioli; Tian, Kevin
Subject: Re: [Xen-devel] [PATCH 4/5] VT-d: widen NUMA nodes to be allocated from

On 26/02/15 13:55, Jan Beulich wrote:

Signed-off-by: Jan Beulich <jbeul...@suse.com><mailto:jbeul...@suse.com>

I believe that this change is incorrect.







--- a/xen/drivers/passthrough/vtd/extern.h

+++ b/xen/drivers/passthrough/vtd/extern.h

@@ -67,7 +67,8 @@ unsigned int get_cache_line_size(void);

 void cacheline_flush(char *);

 void flush_all_cache(void);



-u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages);

+u64 alloc_pgtable_maddr(struct acpi_drhd_unit *, unsigned int npages,

+                        struct domain *);

 void free_pgtable_maddr(u64 maddr);

 void *map_vtd_domain_page(u64 maddr);

 void unmap_vtd_domain_page(void *va);

--- a/xen/drivers/passthrough/vtd/intremap.c

+++ b/xen/drivers/passthrough/vtd/intremap.c

@@ -739,7 +739,8 @@ int enable_intremap(struct iommu *iommu,

     if ( ir_ctrl->iremap_maddr == 0 )

     {

         drhd = iommu_to_drhd(iommu);

-        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, IREMAP_ARCH_PAGE_NR);

+        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, IREMAP_ARCH_PAGE_NR,

+                                                    NULL);

         if ( ir_ctrl->iremap_maddr == 0 )

         {

             dprintk(XENLOG_WARNING VTDPREFIX,

--- a/xen/drivers/passthrough/vtd/iommu.c

+++ b/xen/drivers/passthrough/vtd/iommu.c

@@ -185,7 +185,8 @@ void iommu_flush_cache_page(void *addr,

 }



 /* Allocate page table, return its machine address */

-u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages)

+u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned int npages,

+                        struct domain *d)

 {

     struct acpi_rhsa_unit *rhsa;

     struct page_info *pg, *cur_pg;

@@ -195,10 +196,10 @@ u64 alloc_pgtable_maddr(struct acpi_drhd



     rhsa = drhd_to_rhsa(drhd);

     if ( rhsa )

-        node =  pxm_to_node(rhsa->proximity_domain);

+        node = pxm_to_node(rhsa->proximity_domain);



-    pg = alloc_domheap_pages(NULL, get_order_from_pages(npages),

-                             (node == NUMA_NO_NODE) ? 0 : MEMF_node(node));

+    pg = alloc_domheap_pages(d, get_order_from_pages(npages),

+                             MEMF_node(node) | MEMF_no_owner);

     if ( !pg )

         return 0;



@@ -223,7 +224,7 @@ void free_pgtable_maddr(u64 maddr)

 }



 /* context entry handling */

-static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)

+static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus, struct domain *d)

 {

     struct acpi_drhd_unit *drhd;

     struct root_entry *root, *root_entries;

@@ -235,7 +236,7 @@ static u64 bus_to_context_maddr(struct i

     if ( !root_present(*root) )

     {

         drhd = iommu_to_drhd(iommu);

-        maddr = alloc_pgtable_maddr(drhd, 1);

+        maddr = alloc_pgtable_maddr(drhd, 1, d);

         if ( maddr == 0 )

         {

             unmap_vtd_domain_page(root_entries);

@@ -271,7 +272,9 @@ static u64 addr_to_dma_page_maddr(struct

          */

         pdev = pci_get_pdev_by_domain(domain, -1, -1, -1);

         drhd = acpi_find_matched_drhd_unit(pdev);

-        if ( !alloc || ((hd->arch.pgd_maddr = alloc_pgtable_maddr(drhd, 1)) == 
0) )

+        if ( !alloc ||

+             ((hd->arch.pgd_maddr = alloc_pgtable_maddr(drhd, 1,

+                                                        domain)) == 0) )

This allocation should be based on the proximity information of the iommu, not 
of the requesting domain.





             goto out;

     }



@@ -289,7 +292,7 @@ static u64 addr_to_dma_page_maddr(struct



             pdev = pci_get_pdev_by_domain(domain, -1, -1, -1);

             drhd = acpi_find_matched_drhd_unit(pdev);

-            pte_maddr = alloc_pgtable_maddr(drhd, 1);

+            pte_maddr = alloc_pgtable_maddr(drhd, 1, domain);

As should this.

I don't see a need to extend the prototype of alloc_pgtable_maddr().  The drhd 
parameter contains all relevant information concerning proximity.

~Andrew





             if ( !pte_maddr )

                 break;



@@ -1119,7 +1122,7 @@ int __init iommu_alloc(struct acpi_drhd_

     iommu->intel->drhd = drhd;

     drhd->iommu = iommu;



-    if ( !(iommu->root_maddr = alloc_pgtable_maddr(drhd, 1)) )

+    if ( !(iommu->root_maddr = alloc_pgtable_maddr(drhd, 1, NULL)) )

         return -ENOMEM;



     iommu->reg = ioremap(drhd->address, PAGE_SIZE);

@@ -1270,7 +1273,7 @@ int domain_context_mapping_one(



     ASSERT(spin_is_locked(&pcidevs_lock));

     spin_lock(&iommu->lock);

-    maddr = bus_to_context_maddr(iommu, bus);

+    maddr = bus_to_context_maddr(iommu, bus, domain);

     context_entries = (struct context_entry *)map_vtd_domain_page(maddr);

     context = &context_entries[devfn];



@@ -1495,7 +1498,7 @@ int domain_context_unmap_one(

     ASSERT(spin_is_locked(&pcidevs_lock));

     spin_lock(&iommu->lock);



-    maddr = bus_to_context_maddr(iommu, bus);

+    maddr = bus_to_context_maddr(iommu, bus, domain);

     context_entries = (struct context_entry *)map_vtd_domain_page(maddr);

     context = &context_entries[devfn];



--- a/xen/drivers/passthrough/vtd/qinval.c

+++ b/xen/drivers/passthrough/vtd/qinval.c

@@ -374,7 +374,8 @@ int enable_qinval(struct iommu *iommu)

     if ( qi_ctrl->qinval_maddr == 0 )

     {

         drhd = iommu_to_drhd(iommu);

-        qi_ctrl->qinval_maddr = alloc_pgtable_maddr(drhd, QINVAL_ARCH_PAGE_NR);

+        qi_ctrl->qinval_maddr = alloc_pgtable_maddr(drhd, QINVAL_ARCH_PAGE_NR,

+                                                    NULL);

         if ( qi_ctrl->qinval_maddr == 0 )

         {

             dprintk(XENLOG_WARNING VTDPREFIX,








_______________________________________________

Xen-devel mailing list

Xen-devel@lists.xen.org<mailto:Xen-devel@lists.xen.org>

http://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Reply via email to