amd: Optimize the IOMMU queue flush

Tom Lendacky Mon, 05 Jun 2017 12:52:57 -0700

After reducing the amount of MMIO performed by the IOMMU during operation,
perf data shows that flushing the TLB for all protection domains during
DMA unmapping is a performance issue. It is not necessary to flush the
TLBs for all protection domains, only the protection domains associated
with iova's on the flush queue.


Create a separate queue that tracks the protection domains associated with
the iova's on the flush queue. This new queue optimizes the flushing of
TLBs to the required protection domains.

Reviewed-by: Arindam Nath <[email protected]>
Signed-off-by: Tom Lendacky <[email protected]>
---
 drivers/iommu/amd_iommu.c |   56 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 50 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 856103b..a5e77f0 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -103,7 +103,18 @@ struct flush_queue {
        struct flush_queue_entry *entries;
 };
 
+struct flush_pd_queue_entry {
+       struct protection_domain *pd;
+};
+
+struct flush_pd_queue {
+       /* No lock needed, protected by flush_queue lock */
+       unsigned next;
+       struct flush_pd_queue_entry *entries;
+};
+
 static DEFINE_PER_CPU(struct flush_queue, flush_queue);
+static DEFINE_PER_CPU(struct flush_pd_queue, flush_pd_queue);
 
 static atomic_t queue_timer_on;
 static struct timer_list queue_timer;
@@ -2227,16 +2238,20 @@ static struct iommu_group 
*amd_iommu_device_group(struct device *dev)
  *
  *****************************************************************************/
 
-static void __queue_flush(struct flush_queue *queue)
+static void __queue_flush(struct flush_queue *queue,
+                         struct flush_pd_queue *pd_queue)
 {
-       struct protection_domain *domain;
        unsigned long flags;
        int idx;
 
        /* First flush TLB of all known domains */
        spin_lock_irqsave(&amd_iommu_pd_lock, flags);
-       list_for_each_entry(domain, &amd_iommu_pd_list, list)
-               domain_flush_tlb(domain);
+       for (idx = 0; idx < pd_queue->next; ++idx) {
+               struct flush_pd_queue_entry *entry;
+
+               entry = pd_queue->entries + idx;
+               domain_flush_tlb(entry->pd);
+       }
        spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
 
        /* Wait until flushes have completed */
@@ -2255,6 +2270,7 @@ static void __queue_flush(struct flush_queue *queue)
                entry->dma_dom = NULL;
        }
 
+       pd_queue->next = 0;
        queue->next = 0;
 }
 
@@ -2263,13 +2279,15 @@ static void queue_flush_all(void)
        int cpu;
 
        for_each_possible_cpu(cpu) {
+               struct flush_pd_queue *pd_queue;
                struct flush_queue *queue;
                unsigned long flags;
 
                queue = per_cpu_ptr(&flush_queue, cpu);
+               pd_queue = per_cpu_ptr(&flush_pd_queue, cpu);
                spin_lock_irqsave(&queue->lock, flags);
                if (queue->next > 0)
-                       __queue_flush(queue);
+                       __queue_flush(queue, pd_queue);
                spin_unlock_irqrestore(&queue->lock, flags);
        }
 }
@@ -2283,6 +2301,8 @@ static void queue_flush_timeout(unsigned long unsused)
 static void queue_add(struct dma_ops_domain *dma_dom,
                      unsigned long address, unsigned long pages)
 {
+       struct flush_pd_queue_entry *pd_entry;
+       struct flush_pd_queue *pd_queue;
        struct flush_queue_entry *entry;
        struct flush_queue *queue;
        unsigned long flags;
@@ -2292,10 +2312,22 @@ static void queue_add(struct dma_ops_domain *dma_dom,
        address >>= PAGE_SHIFT;
 
        queue = get_cpu_ptr(&flush_queue);
+       pd_queue = get_cpu_ptr(&flush_pd_queue);
        spin_lock_irqsave(&queue->lock, flags);
 
        if (queue->next == FLUSH_QUEUE_SIZE)
-               __queue_flush(queue);
+               __queue_flush(queue, pd_queue);
+
+       for (idx = 0; idx < pd_queue->next; ++idx) {
+               pd_entry = pd_queue->entries + idx;
+               if (pd_entry->pd == &dma_dom->domain)
+                       break;
+       }
+       if (idx == pd_queue->next) {
+               /* New protection domain, add it to the list */
+               pd_entry = pd_queue->entries + pd_queue->next++;
+               pd_entry->pd = &dma_dom->domain;
+       }
 
        idx   = queue->next++;
        entry = queue->entries + idx;
@@ -2309,6 +2341,7 @@ static void queue_add(struct dma_ops_domain *dma_dom,
        if (atomic_cmpxchg(&queue_timer_on, 0, 1) == 0)
                mod_timer(&queue_timer, jiffies + msecs_to_jiffies(10));
 
+       put_cpu_ptr(&flush_pd_queue);
        put_cpu_ptr(&flush_queue);
 }
 
@@ -2810,6 +2843,8 @@ int __init amd_iommu_init_api(void)
                return ret;
 
        for_each_possible_cpu(cpu) {
+               struct flush_pd_queue *pd_queue = per_cpu_ptr(&flush_pd_queue,
+                                                             cpu);
                struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu);
 
                queue->entries = kzalloc(FLUSH_QUEUE_SIZE *
@@ -2819,6 +2854,12 @@ int __init amd_iommu_init_api(void)
                        goto out_put_iova;
 
                spin_lock_init(&queue->lock);
+
+               pd_queue->entries = kzalloc(FLUSH_QUEUE_SIZE *
+                                           sizeof(*pd_queue->entries),
+                                           GFP_KERNEL);
+               if (!pd_queue->entries)
+                       goto out_put_iova;
        }
 
        err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
@@ -2836,9 +2877,12 @@ int __init amd_iommu_init_api(void)
 
 out_put_iova:
        for_each_possible_cpu(cpu) {
+               struct flush_pd_queue *pd_queue = per_cpu_ptr(&flush_pd_queue,
+                                                             cpu);
                struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu);
 
                kfree(queue->entries);
+               kfree(pd_queue->entries);
        }
 
        return -ENOMEM;

_______________________________________________
iommu mailing list
[email protected]
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v1 3/3] iommu/amd: Optimize the IOMMU queue flush

Reply via email to