On top of the tree at git.infradead.org/users/dwmw2/linux-svm.git
(http:// or git://).

For userspace addresses, we use the MMU notifiers and flush the IOTLB
as appropriate.

However, we need to do it for kernel addresses too — which basically
means adding a hook to tlb_flush_kernel_range(). Does this look
reasonable? I was trying to avoid it and insist on supporting addresses
within the kernel's static mapping only. But it doesn't look like
that's a reasonable thing to require.

Signed-off-by: David Woodhouse <[email protected]>
---
 arch/x86/mm/tlb.c           |    2 ++
 drivers/iommu/intel-svm.c   |   37 ++++++++++++++++++++++++++++++++++---
 include/linux/intel-iommu.h |    6 +++++-
 include/linux/intel-svm.h   |   13 +++++--------
 4 files changed, 46 insertions(+), 12 deletions(-)

diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 0a48ccf..61d9533 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -44,14 +44,11 @@ struct svm_dev_ops {
 
 /*
  * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only
- * for access to kernel addresses. No IOTLB flushes are automatically done
- * for kernel mappings; it is valid only for access to the kernel's static
- * 1:1 mapping of physical memory — not to vmalloc or even module mappings.
- * A future API addition may permit the use of such ranges, by means of an
- * explicit IOTLB flush call (akin to the DMA API's unmap method).
- *
- * It is unlikely that we will ever hook into flush_tlb_kernel_range() to
- * do such IOTLB flushes automatically.
+ * for access to kernel addresses. IOTLB flushes are performed as required
+ * by means of a hook from flush_tlb_kernel_range(). This flag is mutually
+ * exclusive with the SVM_FLAG_PRIVATE_PASID flag — there can be only one
+ * PASID used for kernel mode, to keep the performance implications of the
+ * IOTLB flush hook relatively sane.
  */
 #define SVM_FLAG_SUPERVISOR_MODE       (1<<1)
 diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 8ddb5d0..40ebe83 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -6,6 +6,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/cpu.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
@@ -266,6 +267,7 @@ static void do_kernel_range_flush(void *info)
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
+       intel_iommu_flush_kernel_pasid(start, end);
 
        /* Balance as user space task's flush, a bit conservative */
        if (end == TLB_FLUSH_ALL ||
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index a584df0..f8ca3c1 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -23,6 +23,7 @@
 #include <linux/pci-ats.h>
 #include <linux/dmar.h>
 #include <linux/interrupt.h>
+#include <asm/tlbflush.h>
 
 static irqreturn_t prq_event_thread(int irq, void *d);
 
@@ -264,6 +265,26 @@ static const struct mmu_notifier_ops intel_mmuops = {
        .invalidate_range = intel_invalidate_range,
 };
 
+void intel_iommu_flush_kernel_pasid(unsigned long start, unsigned long end)
+{
+       struct dmar_drhd_unit *drhd;
+       struct intel_iommu *iommu;
+       unsigned long pages;
+
+       if (end == TLB_FLUSH_ALL)
+               pages = end;
+       else
+               pages = (end - start) >> VTD_PAGE_SHIFT;
+
+       rcu_read_lock();
+       for_each_active_iommu(iommu, drhd) {
+               struct intel_svm *svm = rcu_dereference(iommu->kernel_svm);
+               if (svm)
+                       intel_flush_svm_range(svm, start, pages, 0, 1);
+       }
+       rcu_read_unlock();
+}
+
 static DEFINE_MUTEX(pasid_mutex);
 
 int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct 
svm_dev_ops *ops)
@@ -286,6 +307,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int 
flags, struct svm_dev_
                pasid_max = 1 << 20;
 
        if ((flags & SVM_FLAG_SUPERVISOR_MODE)) {
+               if (flags & SVM_FLAG_PRIVATE_PASID)
+                       return -EINVAL;
                if (!ecap_srs(iommu->ecap))
                        return -EINVAL;
        } else if (pasid) {
@@ -294,7 +317,9 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int 
flags, struct svm_dev_
        }
 
        mutex_lock(&pasid_mutex);
-       if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
+       if (SVM_FLAG_SUPERVISOR_MODE)
+               svm = iommu->kernel_svm;
+       else if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
                int i;
 
                idr_for_each_entry(&iommu->pasid_idr, svm, i) {
@@ -378,8 +403,10 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int 
flags, struct svm_dev_
                        }
                        iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) 
| 1;
                        mm = NULL;
-               } else
+               } else {
                        iommu->pasid_table[svm->pasid].val = 
(u64)__pa(init_mm.pgd) | 1 | (1ULL << 11);
+                       rcu_assign_pointer(iommu->kernel_svm, svm);
+               }
                wmb();
        }
        list_add_rcu(&sdev->list, &svm->devs);
@@ -432,8 +459,12 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
                                        mmu_notifier_unregister(&svm->notifier, 
svm->mm);
 
                                        idr_remove(&svm->iommu->pasid_idr, 
svm->pasid);
-                                       if (svm->mm)
+                                       if (svm->mm) {
                                                mmput(svm->mm);
+                                       } else {
+                                               
rcu_assign_pointer(iommu->kernel_svm, NULL);
+                                               synchronize_rcu();
+                                       }
                                        /* We mandate that no page faults may 
be outstanding
                                         * for the PASID when 
intel_svm_unbind_mm() is called.
                                         * If that is not obeyed, subtle errors 
will happen.
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 821273c..169bc84 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -391,6 +391,7 @@ enum {
 struct pasid_entry;
 struct pasid_state_entry;
 struct page_req_dsc;
+struct intel_svm;
 
 struct intel_iommu {
        void __iomem    *reg; /* Pointer to hardware regs, virtual addr */
@@ -426,6 +427,7 @@ struct intel_iommu {
        struct page_req_dsc *prq;
        unsigned char prq_name[16];    /* Name for PRQ interrupt */
        struct idr pasid_idr;
+       struct intel_svm __rcu *kernel_svm;
 #endif
        struct q_inval  *qi;            /* Queued invalidation info */
        u32 *iommu_state; /* Store iommu states between suspend and resume.*/
@@ -496,8 +498,10 @@ struct intel_svm {
 
 extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct 
intel_svm_dev *sdev);
 extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev);
+extern void intel_iommu_flush_kernel_pasid(unsigned long start, unsigned long 
end);
+#else
+#define intel_iommu_flush_kernel_pasid(start, end) do { ; } while(0)
 #endif
-
 extern const struct attribute_group *intel_iommu_groups[];
 
 #endif

-- 
David Woodhouse                            Open Source Technology Centre
[email protected]                              Intel Corporation

Attachment: smime.p7s
Description: S/MIME cryptographic signature

_______________________________________________
iommu mailing list
[email protected]
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to