Create library routines to share ARM SMMU programming
and common IOMMU API implementation for ARM SMMU v1 and v2
based architecture Implementations.

Signed-off-by: Krishna Reddy <vdu...@nvidia.com>
---
 drivers/iommu/Makefile       |    1 +
 drivers/iommu/lib-arm-smmu.c | 1671 ++++++++++++++++++++++++++++++++++++++++++
 drivers/iommu/lib-arm-smmu.h |  161 ++++
 3 files changed, 1833 insertions(+)
 create mode 100644 drivers/iommu/lib-arm-smmu.c
 create mode 100644 drivers/iommu/lib-arm-smmu.h

diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index a158a68..ea87cae 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
 obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd_iommu_debugfs.o
 obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
 obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
+obj-$(CONFIG_ARM_SMMU) += lib-arm-smmu.o
 obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
 obj-$(CONFIG_DMAR_TABLE) += dmar.o
 obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o
diff --git a/drivers/iommu/lib-arm-smmu.c b/drivers/iommu/lib-arm-smmu.c
new file mode 100644
index 0000000..6aba5db
--- /dev/null
+++ b/drivers/iommu/lib-arm-smmu.c
@@ -0,0 +1,1671 @@
+/*
+ * Copyright (c) 2018, NVIDIA Corporation
+ * Author: Krishna Reddy <vdu...@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Library for ARM architected v1 and v2 SMMU implementations.
+ * This library is created by reusing the code from arm-smmu.c which is
+ *       authored by Will Deacon.
+ */
+
+#define pr_fmt(fmt) "lib-arm-smmu: " fmt
+
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/dma-iommu.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/fsl/mc.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
+#include <linux/iopoll.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_iommu.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "io-pgtable.h"
+#include "arm-smmu-regs.h"
+#include "lib-arm-smmu.h"
+
+#define ARM_MMU500_ACTLR_CPRE          (1 << 1)
+
+#define ARM_MMU500_ACR_CACHE_LOCK      (1 << 26)
+#define ARM_MMU500_ACR_S2CRB_TLBEN     (1 << 10)
+#define ARM_MMU500_ACR_SMTNMB_TLBEN    (1 << 8)
+
+#define TLB_LOOP_TIMEOUT               1000000 /* 1s! */
+#define TLB_SPIN_COUNT                 10
+
+/* SMMU global address space */
+#define ARM_SMMU_GR0(smmu)             ((smmu)->base)
+#define ARM_SMMU_GR1(smmu)             ((smmu)->base + (1 << (smmu)->pgshift))
+
+/*
+ * SMMU global address space with conditional offset to access secure
+ * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
+ * nsGFSYNR0: 0x450)
+ */
+#define ARM_SMMU_GR0_NS(smmu)                                          \
+       ((smmu)->base +                                                 \
+               ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
+                       ? 0x400 : 0))
+
+/*
+ * Some 64-bit registers only make sense to write atomically, but in such
+ * cases all the data relevant to AArch32 formats lies within the lower word,
+ * therefore this actually makes more sense than it might first appear.
+ */
+#ifdef CONFIG_64BIT
+#define smmu_write_atomic_lq           writeq_relaxed
+#else
+#define smmu_write_atomic_lq           writel_relaxed
+#endif
+
+/* Translation context bank */
+#define ARM_SMMU_CB(smmu, n)   ((smmu)->cb_base + ((n) << (smmu)->pgshift))
+
+#define MSI_IOVA_BASE                  0x8000000
+#define MSI_IOVA_LENGTH                        0x100000
+
+#define s2cr_init_val (struct arm_smmu_s2cr){                             \
+       .type = smmu->disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
+}
+
+struct arm_smmu_master_cfg {
+       struct arm_smmu_device          *smmu;
+       s16                             smendx[];
+};
+#define INVALID_SMENDX                 -1
+#define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
+#define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
+#define fwspec_smendx(fw, i) \
+       (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
+#define for_each_cfg_sme(fw, i, idx) \
+       for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
+
+enum arm_smmu_context_fmt {
+       ARM_SMMU_CTX_FMT_NONE,
+       ARM_SMMU_CTX_FMT_AARCH64,
+       ARM_SMMU_CTX_FMT_AARCH32_L,
+       ARM_SMMU_CTX_FMT_AARCH32_S,
+};
+
+struct arm_smmu_cfg {
+       u8                              cbndx;
+       u8                              irptndx;
+       union {
+               u16                     asid;
+               u16                     vmid;
+       };
+       u32                             cbar;
+       enum arm_smmu_context_fmt       fmt;
+};
+#define INVALID_IRPTNDX                        0xff
+
+enum arm_smmu_domain_stage {
+       ARM_SMMU_DOMAIN_S1 = 0,
+       ARM_SMMU_DOMAIN_S2,
+       ARM_SMMU_DOMAIN_NESTED,
+       ARM_SMMU_DOMAIN_BYPASS,
+};
+
+struct arm_smmu_domain {
+       struct arm_smmu_device          *smmu;
+       struct io_pgtable_ops           *pgtbl_ops;
+       const struct iommu_gather_ops   *tlb_ops;
+       struct arm_smmu_cfg             cfg;
+       enum arm_smmu_domain_stage      stage;
+       bool                            non_strict;
+       struct mutex                    init_mutex; /* Protects smmu pointer */
+       /* Serialises ATS1* ops and TLB syncs */
+       spinlock_t                      cb_lock;
+       struct iommu_domain             domain;
+};
+
+static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
+{
+       return container_of(dom, struct arm_smmu_domain, domain);
+}
+
+static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
+{
+       int idx;
+
+       do {
+               idx = find_next_zero_bit(map, end, start);
+               if (idx == end)
+                       return -ENOSPC;
+       } while (test_and_set_bit(idx, map));
+
+       return idx;
+}
+
+static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
+{
+       clear_bit(idx, map);
+}
+
+/* Wait for any pending TLB invalidations to complete */
+static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
+                               void __iomem *sync, void __iomem *status)
+{
+       unsigned int spin_cnt, delay;
+
+       writel_relaxed(0, sync);
+       for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
+               for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
+                       if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
+                               return;
+                       cpu_relax();
+               }
+               udelay(delay);
+       }
+       dev_err_ratelimited(smmu->dev,
+                           "TLB sync timed out -- SMMU may be deadlocked\n");
+}
+
+static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
+{
+       void __iomem *base = ARM_SMMU_GR0(smmu);
+       unsigned long flags;
+
+       spin_lock_irqsave(&smmu->global_sync_lock, flags);
+       __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
+                           base + ARM_SMMU_GR0_sTLBGSTATUS);
+       spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
+}
+
+static void arm_smmu_tlb_sync_context(void *cookie)
+{
+       struct arm_smmu_domain *smmu_domain = cookie;
+       struct arm_smmu_device *smmu = smmu_domain->smmu;
+       void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
+       unsigned long flags;
+
+       spin_lock_irqsave(&smmu_domain->cb_lock, flags);
+       __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
+                           base + ARM_SMMU_CB_TLBSTATUS);
+       spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
+}
+
+static void arm_smmu_tlb_sync_vmid(void *cookie)
+{
+       struct arm_smmu_domain *smmu_domain = cookie;
+
+       arm_smmu_tlb_sync_global(smmu_domain->smmu);
+}
+
+static void arm_smmu_tlb_inv_context_s1(void *cookie)
+{
+       struct arm_smmu_domain *smmu_domain = cookie;
+       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+       void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
+
+       /*
+        * NOTE: this is not a relaxed write; it needs to guarantee that PTEs
+        * cleared by the current CPU are visible to the SMMU before the TLBI.
+        */
+       writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
+       arm_smmu_tlb_sync_context(cookie);
+}
+
+static void arm_smmu_tlb_inv_context_s2(void *cookie)
+{
+       struct arm_smmu_domain *smmu_domain = cookie;
+       struct arm_smmu_device *smmu = smmu_domain->smmu;
+       void __iomem *base = ARM_SMMU_GR0(smmu);
+
+       /* NOTE: see above */
+       writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
+       arm_smmu_tlb_sync_global(smmu);
+}
+
+static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
+                               size_t granule, bool leaf, void *cookie)
+{
+       struct arm_smmu_domain *smmu_domain = cookie;
+       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+       bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
+       void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
+
+       if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+               wmb();
+
+       if (stage1) {
+               reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
+
+               if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
+                       iova &= ~12UL;
+                       iova |= cfg->asid;
+                       do {
+                               writel_relaxed(iova, reg);
+                               iova += granule;
+                       } while (size -= granule);
+               } else {
+                       iova >>= 12;
+                       iova |= (u64)cfg->asid << 48;
+                       do {
+                               writeq_relaxed(iova, reg);
+                               iova += granule >> 12;
+                       } while (size -= granule);
+               }
+       } else {
+               reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
+                             ARM_SMMU_CB_S2_TLBIIPAS2;
+               iova >>= 12;
+               do {
+                       smmu_write_atomic_lq(iova, reg);
+                       iova += granule >> 12;
+               } while (size -= granule);
+       }
+}
+
+/*
+ * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
+ * almost negligible, but the benefit of getting the first one in as far ahead
+ * of the sync as possible is significant, hence we don't just make this a
+ * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
+ */
+static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
+                               size_t granule, bool leaf, void *cookie)
+{
+       struct arm_smmu_domain *smmu_domain = cookie;
+       void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
+
+       if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+               wmb();
+
+       writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
+}
+
+static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
+       .tlb_flush_all  = arm_smmu_tlb_inv_context_s1,
+       .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
+       .tlb_sync       = arm_smmu_tlb_sync_context,
+};
+
+static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
+       .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
+       .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
+       .tlb_sync       = arm_smmu_tlb_sync_context,
+};
+
+static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
+       .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
+       .tlb_add_flush  = arm_smmu_tlb_inv_vmid_nosync,
+       .tlb_sync       = arm_smmu_tlb_sync_vmid,
+};
+
+irqreturn_t arm_smmu_context_fault(int irq, void *dev)
+{
+       u32 fsr, fsynr;
+       unsigned long iova;
+       struct iommu_domain *domain = dev;
+       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+       struct arm_smmu_device *smmu = smmu_domain->smmu;
+       void __iomem *cb_base;
+
+       cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
+       fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
+
+       if (!(fsr & FSR_FAULT))
+               return IRQ_NONE;
+
+       fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
+       iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
+
+       dev_err_ratelimited(smmu->dev,
+       "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
+                           fsr, iova, fsynr, cfg->cbndx);
+
+       writel(fsr, cb_base + ARM_SMMU_CB_FSR);
+       return IRQ_HANDLED;
+}
+
+irqreturn_t arm_smmu_global_fault(int irq, void *dev)
+{
+       u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
+       struct arm_smmu_device *smmu = dev;
+       void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
+
+       gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
+       gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
+       gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
+       gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
+
+       if (!gfsr)
+               return IRQ_NONE;
+
+       dev_err_ratelimited(smmu->dev,
+               "Unexpected global fault, this could be serious\n");
+       dev_err_ratelimited(smmu->dev,
+               "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 
0x%08x\n",
+               gfsr, gfsynr0, gfsynr1, gfsynr2);
+
+       writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
+       return IRQ_HANDLED;
+}
+
+static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
+                                      struct io_pgtable_cfg *pgtbl_cfg)
+{
+       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+       struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
+       bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
+
+       cb->cfg = cfg;
+
+       /* TTBCR */
+       if (stage1) {
+               if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
+                       cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
+               } else {
+                       cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
+                       cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
+                       cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
+                       if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
+                               cb->tcr[1] |= TTBCR2_AS;
+               }
+       } else {
+               cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
+       }
+
+       /* TTBRs */
+       if (stage1) {
+               if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
+                       cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
+                       cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
+               } else {
+                       cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
+                       cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
+                       cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
+                       cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
+               }
+       } else {
+               cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
+       }
+
+       /* MAIRs (stage-1 only) */
+       if (stage1) {
+               if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
+                       cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
+                       cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
+               } else {
+                       cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
+                       cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
+               }
+       }
+}
+
+static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
+{
+       u32 reg;
+       bool stage1;
+       struct arm_smmu_cb *cb = &smmu->cbs[idx];
+       struct arm_smmu_cfg *cfg = cb->cfg;
+       void __iomem *cb_base, *gr1_base;
+
+       cb_base = ARM_SMMU_CB(smmu, idx);
+
+       /* Unassigned context banks only need disabling */
+       if (!cfg) {
+               writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
+               return;
+       }
+
+       gr1_base = ARM_SMMU_GR1(smmu);
+       stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
+
+       /* CBA2R */
+       if (smmu->version > ARM_SMMU_V1) {
+               if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
+                       reg = CBA2R_RW64_64BIT;
+               else
+                       reg = CBA2R_RW64_32BIT;
+               /* 16-bit VMIDs live in CBA2R */
+               if (smmu->features & ARM_SMMU_FEAT_VMID16)
+                       reg |= cfg->vmid << CBA2R_VMID_SHIFT;
+
+               writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
+       }
+
+       /* CBAR */
+       reg = cfg->cbar;
+       if (smmu->version < ARM_SMMU_V2)
+               reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
+
+       /*
+        * Use the weakest shareability/memory types, so they are
+        * overridden by the ttbcr/pte.
+        */
+       if (stage1) {
+               reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
+                       (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
+       } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
+               /* 8-bit VMIDs live in CBAR */
+               reg |= cfg->vmid << CBAR_VMID_SHIFT;
+       }
+       writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
+
+       /*
+        * TTBCR
+        * We must write this before the TTBRs, since it determines the
+        * access behaviour of some fields (in particular, ASID[15:8]).
+        */
+       if (stage1 && smmu->version > ARM_SMMU_V1)
+               writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
+       writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
+
+       /* TTBRs */
+       if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
+               writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
+               writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
+               writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
+       } else {
+               writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
+               if (stage1)
+                       writeq_relaxed(cb->ttbr[1],
+                                      cb_base + ARM_SMMU_CB_TTBR1);
+       }
+
+       /* MAIRs (stage-1 only) */
+       if (stage1) {
+               writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
+               writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
+       }
+
+       /* SCTLR */
+       reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
+       if (stage1)
+               reg |= SCTLR_S1_ASIDPNE;
+       if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+               reg |= SCTLR_E;
+
+       writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
+}
+
+static int arm_smmu_init_domain_context(struct iommu_domain *domain,
+                                       struct arm_smmu_device *smmu)
+{
+       int irq, start, ret = 0;
+       unsigned long ias, oas;
+       struct io_pgtable_ops *pgtbl_ops;
+       struct io_pgtable_cfg pgtbl_cfg;
+       enum io_pgtable_fmt fmt;
+       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+
+       mutex_lock(&smmu_domain->init_mutex);
+       if (smmu_domain->smmu)
+               goto out_unlock;
+
+       if (domain->type == IOMMU_DOMAIN_IDENTITY) {
+               smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
+               smmu_domain->smmu = smmu;
+               goto out_unlock;
+       }
+
+       /*
+        * Mapping the requested stage onto what we support is surprisingly
+        * complicated, mainly because the spec allows S1+S2 SMMUs without
+        * support for nested translation. That means we end up with the
+        * following table:
+        *
+        * Requested        Supported        Actual
+        *     S1               N              S1
+        *     S1             S1+S2            S1
+        *     S1               S2             S2
+        *     S1               S1             S1
+        *     N                N              N
+        *     N              S1+S2            S2
+        *     N                S2             S2
+        *     N                S1             S1
+        *
+        * Note that you can't actually request stage-2 mappings.
+        */
+       if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
+               smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
+       if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
+               smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+
+       /*
+        * Choosing a suitable context format is even more fiddly. Until we
+        * grow some way for the caller to express a preference, and/or move
+        * the decision into the io-pgtable code where it arguably belongs,
+        * just aim for the closest thing to the rest of the system, and hope
+        * that the hardware isn't esoteric enough that we can't assume AArch64
+        * support to be a superset of AArch32 support...
+        */
+       if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
+               cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
+       if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
+           !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
+           (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
+           (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
+               cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
+       if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
+           (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
+                              ARM_SMMU_FEAT_FMT_AARCH64_16K |
+                              ARM_SMMU_FEAT_FMT_AARCH64_4K)))
+               cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
+
+       if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+
+       switch (smmu_domain->stage) {
+       case ARM_SMMU_DOMAIN_S1:
+               cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
+               start = smmu->num_s2_context_banks;
+               ias = smmu->va_size;
+               oas = smmu->ipa_size;
+               if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
+                       fmt = ARM_64_LPAE_S1;
+               } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
+                       fmt = ARM_32_LPAE_S1;
+                       ias = min(ias, 32UL);
+                       oas = min(oas, 40UL);
+               } else {
+                       fmt = ARM_V7S;
+                       ias = min(ias, 32UL);
+                       oas = min(oas, 32UL);
+               }
+               smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
+               break;
+       case ARM_SMMU_DOMAIN_NESTED:
+               /*
+                * We will likely want to change this if/when KVM gets
+                * involved.
+                */
+       case ARM_SMMU_DOMAIN_S2:
+               cfg->cbar = CBAR_TYPE_S2_TRANS;
+               start = 0;
+               ias = smmu->ipa_size;
+               oas = smmu->pa_size;
+               if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
+                       fmt = ARM_64_LPAE_S2;
+               } else {
+                       fmt = ARM_32_LPAE_S2;
+                       ias = min(ias, 40UL);
+                       oas = min(oas, 40UL);
+               }
+               if (smmu->version == ARM_SMMU_V2)
+                       smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
+               else
+                       smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
+               break;
+       default:
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+       ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
+                                     smmu->num_context_banks);
+       if (ret < 0)
+               goto out_unlock;
+
+       cfg->cbndx = ret;
+       if (smmu->version < ARM_SMMU_V2) {
+               cfg->irptndx = atomic_inc_return(&smmu->irptndx);
+               cfg->irptndx %= smmu->num_context_irqs;
+       } else {
+               cfg->irptndx = cfg->cbndx;
+       }
+
+       if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
+               cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
+       else
+               cfg->asid = cfg->cbndx + smmu->cavium_id_base;
+
+       pgtbl_cfg = (struct io_pgtable_cfg) {
+               .pgsize_bitmap  = smmu->pgsize_bitmap,
+               .ias            = ias,
+               .oas            = oas,
+               .tlb            = smmu_domain->tlb_ops,
+               .iommu_dev      = smmu->dev,
+       };
+
+       if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+               pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
+
+       if (smmu_domain->non_strict)
+               pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
+
+       smmu_domain->smmu = smmu;
+       pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
+       if (!pgtbl_ops) {
+               ret = -ENOMEM;
+               goto out_clear_smmu;
+       }
+
+       /* Update the domain's page sizes to reflect the page table format */
+       domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
+       domain->geometry.aperture_end = (1UL << ias) - 1;
+       domain->geometry.force_aperture = true;
+
+       /* Initialise the context bank with our page table cfg */
+       arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
+       arm_smmu_write_context_bank(smmu, cfg->cbndx);
+
+       /*
+        * Request context fault interrupt. Do this last to avoid the
+        * handler seeing a half-initialised domain state.
+        */
+       irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
+       ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
+                              IRQF_SHARED, "arm-smmu-context-fault", domain);
+       if (ret < 0) {
+               dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
+                       cfg->irptndx, irq);
+               cfg->irptndx = INVALID_IRPTNDX;
+       }
+
+       mutex_unlock(&smmu_domain->init_mutex);
+
+       /* Publish page table ops for map/unmap */
+       smmu_domain->pgtbl_ops = pgtbl_ops;
+       return 0;
+
+out_clear_smmu:
+       smmu_domain->smmu = NULL;
+out_unlock:
+       mutex_unlock(&smmu_domain->init_mutex);
+       return ret;
+}
+
+static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
+{
+       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+       struct arm_smmu_device *smmu = smmu_domain->smmu;
+       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+       int irq;
+
+       if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
+               return;
+
+       /*
+        * Disable the context bank and free the page tables before freeing
+        * it.
+        */
+       smmu->cbs[cfg->cbndx].cfg = NULL;
+       arm_smmu_write_context_bank(smmu, cfg->cbndx);
+
+       if (cfg->irptndx != INVALID_IRPTNDX) {
+               irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
+               devm_free_irq(smmu->dev, irq, domain);
+       }
+
+       free_io_pgtable_ops(smmu_domain->pgtbl_ops);
+       __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
+}
+
+struct iommu_domain *arm_smmu_domain_alloc_common(unsigned int type,
+                                                 bool using_legacy_binding)
+{
+       struct arm_smmu_domain *smmu_domain;
+
+       if (type != IOMMU_DOMAIN_UNMANAGED &&
+           type != IOMMU_DOMAIN_DMA &&
+           type != IOMMU_DOMAIN_IDENTITY)
+               return NULL;
+       /*
+        * Allocate the domain and initialise some of its data structures.
+        * We can't really do anything meaningful until we've added a
+        * master.
+        */
+       smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
+       if (!smmu_domain)
+               return NULL;
+
+       if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
+           iommu_get_dma_cookie(&smmu_domain->domain))) {
+               kfree(smmu_domain);
+               return NULL;
+       }
+
+       mutex_init(&smmu_domain->init_mutex);
+       spin_lock_init(&smmu_domain->cb_lock);
+
+       return &smmu_domain->domain;
+}
+
+void arm_smmu_domain_free(struct iommu_domain *domain)
+{
+       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+       /*
+        * Free the domain resources. We assume that all devices have
+        * already been detached.
+        */
+       iommu_put_dma_cookie(domain);
+       arm_smmu_destroy_domain_context(domain);
+       kfree(smmu_domain);
+}
+
+static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
+{
+       struct arm_smmu_smr *smr = smmu->smrs + idx;
+       u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
+
+       if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
+               reg |= SMR_VALID;
+       writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
+}
+
+static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
+{
+       struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
+       u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
+                 (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
+                 (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
+
+       if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
+           smmu->smrs[idx].valid)
+               reg |= S2CR_EXIDVALID;
+       writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
+}
+
+static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
+{
+       arm_smmu_write_s2cr(smmu, idx);
+       if (smmu->smrs)
+               arm_smmu_write_smr(smmu, idx);
+}
+
+/*
+ * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
+ * should be called after sCR0 is written.
+ */
+void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
+{
+       void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+       u32 smr;
+
+       if (!smmu->smrs)
+               return;
+
+       /*
+        * SMR.ID bits may not be preserved if the corresponding MASK
+        * bits are set, so check each one separately. We can reject
+        * masters later if they try to claim IDs outside these masks.
+        */
+       smr = smmu->streamid_mask << SMR_ID_SHIFT;
+       writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
+       smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
+       smmu->streamid_mask = smr >> SMR_ID_SHIFT;
+
+       smr = smmu->streamid_mask << SMR_MASK_SHIFT;
+       writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
+       smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
+       smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
+}
+
+static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
+{
+       struct arm_smmu_smr *smrs = smmu->smrs;
+       int i, free_idx = -ENOSPC;
+
+       /* Stream indexing is blissfully easy */
+       if (!smrs)
+               return id;
+
+       /* Validating SMRs is... less so */
+       for (i = 0; i < smmu->num_mapping_groups; ++i) {
+               if (!smrs[i].valid) {
+                       /*
+                        * Note the first free entry we come across, which
+                        * we'll claim in the end if nothing else matches.
+                        */
+                       if (free_idx < 0)
+                               free_idx = i;
+                       continue;
+               }
+               /*
+                * If the new entry is _entirely_ matched by an existing entry,
+                * then reuse that, with the guarantee that there also cannot
+                * be any subsequent conflicting entries. In normal use we'd
+                * expect simply identical entries for this case, but there's
+                * no harm in accommodating the generalisation.
+                */
+               if ((mask & smrs[i].mask) == mask &&
+                   !((id ^ smrs[i].id) & ~smrs[i].mask))
+                       return i;
+               /*
+                * If the new entry has any other overlap with an existing one,
+                * though, then there always exists at least one stream ID
+                * which would cause a conflict, and we can't allow that risk.
+                */
+               if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
+                       return -EINVAL;
+       }
+
+       return free_idx;
+}
+
+static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
+{
+       if (--smmu->s2crs[idx].count)
+               return false;
+
+       smmu->s2crs[idx] = s2cr_init_val;
+       if (smmu->smrs)
+               smmu->smrs[idx].valid = false;
+
+       return true;
+}
+
+static int arm_smmu_master_alloc_smes(struct device *dev)
+{
+       struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+       struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
+       struct arm_smmu_device *smmu = cfg->smmu;
+       struct arm_smmu_smr *smrs = smmu->smrs;
+       struct iommu_group *group;
+       int i, idx, ret;
+
+       mutex_lock(&smmu->stream_map_mutex);
+       /* Figure out a viable stream map entry allocation */
+       for_each_cfg_sme(fwspec, i, idx) {
+               u16 sid = fwspec->ids[i];
+               u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
+
+               if (idx != INVALID_SMENDX) {
+                       ret = -EEXIST;
+                       goto out_err;
+               }
+
+               ret = arm_smmu_find_sme(smmu, sid, mask);
+               if (ret < 0)
+                       goto out_err;
+
+               idx = ret;
+               if (smrs && smmu->s2crs[idx].count == 0) {
+                       smrs[idx].id = sid;
+                       smrs[idx].mask = mask;
+                       smrs[idx].valid = true;
+               }
+               smmu->s2crs[idx].count++;
+               cfg->smendx[i] = (s16)idx;
+       }
+
+       group = iommu_group_get_for_dev(dev);
+       if (!group)
+               group = ERR_PTR(-ENOMEM);
+       if (IS_ERR(group)) {
+               ret = PTR_ERR(group);
+               goto out_err;
+       }
+       iommu_group_put(group);
+
+       /* It worked! Now, poke the actual hardware */
+       for_each_cfg_sme(fwspec, i, idx) {
+               arm_smmu_write_sme(smmu, idx);
+               smmu->s2crs[idx].group = group;
+       }
+
+       mutex_unlock(&smmu->stream_map_mutex);
+       return 0;
+
+out_err:
+       while (i--) {
+               arm_smmu_free_sme(smmu, cfg->smendx[i]);
+               cfg->smendx[i] = INVALID_SMENDX;
+       }
+       mutex_unlock(&smmu->stream_map_mutex);
+       return ret;
+}
+
+static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
+{
+       struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
+       struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
+       int i, idx;
+
+       mutex_lock(&smmu->stream_map_mutex);
+       for_each_cfg_sme(fwspec, i, idx) {
+               if (arm_smmu_free_sme(smmu, idx))
+                       arm_smmu_write_sme(smmu, idx);
+               cfg->smendx[i] = INVALID_SMENDX;
+       }
+       mutex_unlock(&smmu->stream_map_mutex);
+}
+
+static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
+                                     struct iommu_fwspec *fwspec)
+{
+       struct arm_smmu_device *smmu = smmu_domain->smmu;
+       struct arm_smmu_s2cr *s2cr = smmu->s2crs;
+       u8 cbndx = smmu_domain->cfg.cbndx;
+       enum arm_smmu_s2cr_type type;
+       int i, idx;
+
+       if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
+               type = S2CR_TYPE_BYPASS;
+       else
+               type = S2CR_TYPE_TRANS;
+
+       for_each_cfg_sme(fwspec, i, idx) {
+               if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
+                       continue;
+
+               s2cr[idx].type = type;
+               s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
+               s2cr[idx].cbndx = cbndx;
+               arm_smmu_write_s2cr(smmu, idx);
+       }
+       return 0;
+}
+
+int arm_smmu_attach_dev_common(struct iommu_domain *domain,
+       struct device *dev, struct iommu_ops *ops)
+{
+       int ret;
+       struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+       struct arm_smmu_device *smmu;
+       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+       if (!fwspec || fwspec->ops != ops) {
+               dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
+               return -ENXIO;
+       }
+
+       /*
+        * FIXME: The arch/arm DMA API code tries to attach devices to its own
+        * domains between of_xlate() and add_device() - we have no way to cope
+        * with that, so until ARM gets converted to rely on groups and default
+        * domains, just say no (but more politely than by dereferencing NULL).
+        * This should be at least a WARN_ON once that's sorted.
+        */
+       if (!fwspec->iommu_priv)
+               return -ENODEV;
+
+       smmu = fwspec_smmu(fwspec);
+       /* Ensure that the domain is finalised */
+       ret = arm_smmu_init_domain_context(domain, smmu);
+       if (ret < 0)
+               return ret;
+
+       /*
+        * Sanity check the domain. We don't support domains across
+        * different SMMUs.
+        */
+       if (smmu_domain->smmu != smmu) {
+               dev_err(dev,
+                       "cannot attach to SMMU %s whilst already attached to 
domain on SMMU %s\n",
+                       dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
+               return -EINVAL;
+       }
+
+       /* Looks ok, so add the device to the domain */
+       return arm_smmu_domain_add_master(smmu_domain, fwspec);
+}
+
+int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
+                phys_addr_t paddr, size_t size, int prot)
+{
+       struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+
+       if (!ops)
+               return -ENODEV;
+
+       return ops->map(ops, iova, paddr, size, prot);
+}
+
+size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
+                     size_t size)
+{
+       struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+
+       if (!ops)
+               return 0;
+
+       return ops->unmap(ops, iova, size);
+}
+
+void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
+{
+       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+       if (smmu_domain->tlb_ops)
+               smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
+}
+
+void arm_smmu_iotlb_sync(struct iommu_domain *domain)
+{
+       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+       if (smmu_domain->tlb_ops)
+               smmu_domain->tlb_ops->tlb_sync(smmu_domain);
+}
+
+static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
+                                             dma_addr_t iova)
+{
+       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+       struct arm_smmu_device *smmu = smmu_domain->smmu;
+       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+       struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+       struct device *dev = smmu->dev;
+       void __iomem *cb_base;
+       u32 tmp;
+       u64 phys;
+       unsigned long va, flags;
+
+       cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
+
+       spin_lock_irqsave(&smmu_domain->cb_lock, flags);
+       /* ATS1 registers can only be written atomically */
+       va = iova & ~0xfffUL;
+       if (smmu->version == ARM_SMMU_V2)
+               smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
+       else /* Register is only 32-bit in v1 */
+               writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
+
+       if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
+                                     !(tmp & ATSR_ACTIVE), 5, 50)) {
+               spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
+               dev_err(dev,
+                       "iova to phys timed out on %pad. Falling back to 
software table walk.\n",
+                       &iova);
+               return ops->iova_to_phys(ops, iova);
+       }
+
+       phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
+       spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
+       if (phys & CB_PAR_F) {
+               dev_err(dev, "translation fault!\n");
+               dev_err(dev, "PAR = 0x%llx\n", phys);
+               return 0;
+       }
+
+       return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
+}
+
+phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
+                                 dma_addr_t iova)
+{
+       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+       struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+
+       if (domain->type == IOMMU_DOMAIN_IDENTITY)
+               return iova;
+
+       if (!ops)
+               return 0;
+
+       if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
+                       smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
+               return arm_smmu_iova_to_phys_hard(domain, iova);
+
+       return ops->iova_to_phys(ops, iova);
+}
+
+bool arm_smmu_capable(enum iommu_cap cap)
+{
+       switch (cap) {
+       case IOMMU_CAP_CACHE_COHERENCY:
+               /*
+                * Return true here as the SMMU can always send out coherent
+                * requests.
+                */
+               return true;
+       case IOMMU_CAP_NOEXEC:
+               return true;
+       default:
+               return false;
+       }
+}
+
+int arm_smmu_add_device_common(struct device *dev, struct arm_smmu_device 
*smmu)
+{
+       struct arm_smmu_master_cfg *cfg;
+       struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+       int i, ret = -EINVAL;
+
+       for (i = 0; i < fwspec->num_ids; i++) {
+               u16 sid = fwspec->ids[i];
+               u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
+
+               if (sid & ~smmu->streamid_mask) {
+                       dev_err(dev, "stream ID 0x%x out of range for SMMU 
(0x%x)\n",
+                               sid, smmu->streamid_mask);
+                       goto out_free;
+               }
+               if (mask & ~smmu->smr_mask_mask) {
+                       dev_err(dev, "SMR mask 0x%x out of range for SMMU 
(0x%x)\n",
+                               mask, smmu->smr_mask_mask);
+                       goto out_free;
+               }
+       }
+
+       ret = -ENOMEM;
+       cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
+                     GFP_KERNEL);
+       if (!cfg)
+               goto out_free;
+
+       cfg->smmu = smmu;
+       fwspec->iommu_priv = cfg;
+       while (i--)
+               cfg->smendx[i] = INVALID_SMENDX;
+
+       ret = arm_smmu_master_alloc_smes(dev);
+       if (ret)
+               goto out_cfg_free;
+
+       iommu_device_link(&smmu->iommu, dev);
+
+       return 0;
+
+out_cfg_free:
+       kfree(cfg);
+out_free:
+       iommu_fwspec_free(dev);
+       return ret;
+}
+
+void arm_smmu_remove_device_common(struct device *dev, struct iommu_ops *ops)
+{
+       struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+       struct arm_smmu_master_cfg *cfg;
+       struct arm_smmu_device *smmu;
+
+       if (!fwspec || fwspec->ops != ops)
+               return;
+
+       cfg  = fwspec->iommu_priv;
+       smmu = cfg->smmu;
+
+       iommu_device_unlink(&smmu->iommu, dev);
+       arm_smmu_master_free_smes(fwspec);
+       iommu_group_remove_device(dev);
+       kfree(fwspec->iommu_priv);
+       iommu_fwspec_free(dev);
+}
+
+struct iommu_group *arm_smmu_device_group(struct device *dev)
+{
+       struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+       struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
+       struct iommu_group *group = NULL;
+       int i, idx;
+
+       for_each_cfg_sme(fwspec, i, idx) {
+               if (group && smmu->s2crs[idx].group &&
+                   group != smmu->s2crs[idx].group)
+                       return ERR_PTR(-EINVAL);
+
+               group = smmu->s2crs[idx].group;
+       }
+
+       if (group)
+               return iommu_group_ref_get(group);
+
+       if (dev_is_pci(dev))
+               group = pci_device_group(dev);
+       else if (dev_is_fsl_mc(dev))
+               group = fsl_mc_device_group(dev);
+       else
+               group = generic_device_group(dev);
+
+       return group;
+}
+
+int arm_smmu_domain_get_attr(struct iommu_domain *domain,
+                            enum iommu_attr attr, void *data)
+{
+       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+       switch (domain->type) {
+       case IOMMU_DOMAIN_UNMANAGED:
+               switch (attr) {
+               case DOMAIN_ATTR_NESTING:
+                       *(int *)data = (smmu_domain->stage ==
+                                       ARM_SMMU_DOMAIN_NESTED);
+                       return 0;
+               default:
+                       return -ENODEV;
+               }
+               break;
+       case IOMMU_DOMAIN_DMA:
+               switch (attr) {
+               case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+                       *(int *)data = smmu_domain->non_strict;
+                       return 0;
+               default:
+                       return -ENODEV;
+               }
+               break;
+       default:
+               return -EINVAL;
+       }
+}
+
+int arm_smmu_domain_set_attr(struct iommu_domain *domain,
+                            enum iommu_attr attr, void *data)
+{
+       int ret = 0;
+       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+       mutex_lock(&smmu_domain->init_mutex);
+
+       switch (domain->type) {
+       case IOMMU_DOMAIN_UNMANAGED:
+               switch (attr) {
+               case DOMAIN_ATTR_NESTING:
+                       if (smmu_domain->smmu) {
+                               ret = -EPERM;
+                               goto out_unlock;
+                       }
+
+                       if (*(int *)data)
+                               smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
+                       else
+                               smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+                       break;
+               default:
+                       ret = -ENODEV;
+               }
+               break;
+       case IOMMU_DOMAIN_DMA:
+               switch (attr) {
+               case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+                       smmu_domain->non_strict = *(int *)data;
+                       break;
+               default:
+                       ret = -ENODEV;
+               }
+               break;
+       default:
+               ret = -EINVAL;
+       }
+out_unlock:
+       mutex_unlock(&smmu_domain->init_mutex);
+       return ret;
+}
+
+int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
+{
+       u32 mask, fwid = 0;
+
+       if (args->args_count > 0)
+               fwid |= (u16)args->args[0];
+
+       if (args->args_count > 1)
+               fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
+       else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
+               fwid |= (u16)mask << SMR_MASK_SHIFT;
+
+       return iommu_fwspec_add_ids(dev, &fwid, 1);
+}
+
+void arm_smmu_get_resv_regions(struct device *dev,
+                              struct list_head *head)
+{
+       struct iommu_resv_region *region;
+       int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
+
+       region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
+                                        prot, IOMMU_RESV_SW_MSI);
+       if (!region)
+               return;
+
+       list_add_tail(&region->list, head);
+
+       iommu_dma_get_resv_regions(dev, head);
+}
+
+void arm_smmu_put_resv_regions(struct device *dev,
+                              struct list_head *head)
+{
+       struct iommu_resv_region *entry, *next;
+
+       list_for_each_entry_safe(entry, next, head, list)
+               kfree(entry);
+}
+
+void arm_smmu_device_reset(struct arm_smmu_device *smmu)
+{
+       void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+       int i;
+       u32 reg, major;
+
+       /* clear global FSR */
+       reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
+       writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
+
+       /*
+        * Reset stream mapping groups: Initial values mark all SMRn as
+        * invalid and all S2CRn as bypass unless overridden.
+        */
+       for (i = 0; i < smmu->num_mapping_groups; ++i)
+               arm_smmu_write_sme(smmu, i);
+
+       if (smmu->model == ARM_MMU500) {
+               /*
+                * Before clearing ARM_MMU500_ACTLR_CPRE, need to
+                * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
+                * bit is only present in MMU-500r2 onwards.
+                */
+               reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
+               major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
+               reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
+               if (major >= 2)
+                       reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
+               /*
+                * Allow unmatched Stream IDs to allocate bypass
+                * TLB entries for reduced latency.
+                */
+               reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
+               writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
+       }
+
+       /* Make sure all context banks are disabled and clear CB_FSR  */
+       for (i = 0; i < smmu->num_context_banks; ++i) {
+               void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
+
+               arm_smmu_write_context_bank(smmu, i);
+               writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
+               /*
+                * Disable MMU-500's not-particularly-beneficial next-page
+                * prefetcher for the sake of errata #841119 and #826419.
+                */
+               if (smmu->model == ARM_MMU500) {
+                       reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
+                       reg &= ~ARM_MMU500_ACTLR_CPRE;
+                       writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
+               }
+       }
+
+       /* Invalidate the TLB, just in case */
+       writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
+       writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
+
+       reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+
+       /* Enable fault reporting */
+       reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
+
+       /* Disable TLB broadcasting. */
+       reg |= (sCR0_VMIDPNE | sCR0_PTM);
+
+       /* Enable client access, handling unmatched streams as appropriate */
+       reg &= ~sCR0_CLIENTPD;
+       if (smmu->disable_bypass)
+               reg |= sCR0_USFCFG;
+       else
+               reg &= ~sCR0_USFCFG;
+
+       /* Disable forced broadcasting */
+       reg &= ~sCR0_FB;
+
+       /* Don't upgrade barriers */
+       reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
+
+       if (smmu->features & ARM_SMMU_FEAT_VMID16)
+               reg |= sCR0_VMID16EN;
+
+       if (smmu->features & ARM_SMMU_FEAT_EXIDS)
+               reg |= sCR0_EXIDENABLE;
+
+       /* Push the button */
+       arm_smmu_tlb_sync_global(smmu);
+       writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+}
+
+static int arm_smmu_id_size_to_bits(int size)
+{
+       switch (size) {
+       case 0:
+               return 32;
+       case 1:
+               return 36;
+       case 2:
+               return 40;
+       case 3:
+               return 42;
+       case 4:
+               return 44;
+       case 5:
+       default:
+               return 48;
+       }
+}
+
+int arm_smmu_device_cfg_probe_common(struct arm_smmu_device *smmu,
+       struct iommu_ops *ops, atomic_t *context_count, int force_stage)
+{
+       unsigned long size;
+       void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+       u32 id;
+       bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
+       int i;
+
+       dev_notice(smmu->dev, "probing hardware configuration...\n");
+       dev_notice(smmu->dev, "SMMUv%d with:\n",
+                       smmu->version == ARM_SMMU_V2 ? 2 : 1);
+
+       /* ID0 */
+       id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
+
+       /* Restrict available stages based on module parameter */
+       if (force_stage == 1)
+               id &= ~(ID0_S2TS | ID0_NTS);
+       else if (force_stage == 2)
+               id &= ~(ID0_S1TS | ID0_NTS);
+
+       if (id & ID0_S1TS) {
+               smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
+               dev_notice(smmu->dev, "\tstage 1 translation\n");
+       }
+
+       if (id & ID0_S2TS) {
+               smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
+               dev_notice(smmu->dev, "\tstage 2 translation\n");
+       }
+
+       if (id & ID0_NTS) {
+               smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
+               dev_notice(smmu->dev, "\tnested translation\n");
+       }
+
+       if (!(smmu->features &
+               (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
+               dev_err(smmu->dev, "\tno translation support!\n");
+               return -ENODEV;
+       }
+
+       if ((id & ID0_S1TS) &&
+               ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
+               smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
+               dev_notice(smmu->dev, "\taddress translation ops\n");
+       }
+
+       /*
+        * In order for DMA API calls to work properly, we must defer to what
+        * the FW says about coherency, regardless of what the hardware claims.
+        * Fortunately, this also opens up a workaround for systems where the
+        * ID register value has ended up configured incorrectly.
+        */
+       cttw_reg = !!(id & ID0_CTTW);
+       if (cttw_fw || cttw_reg)
+               dev_notice(smmu->dev, "\t%scoherent table walk\n",
+                          cttw_fw ? "" : "non-");
+       if (cttw_fw != cttw_reg)
+               dev_notice(smmu->dev,
+                          "\t(IDR0.CTTW overridden by FW configuration)\n");
+
+       /* Max. number of entries we have for stream matching/indexing */
+       if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
+               smmu->features |= ARM_SMMU_FEAT_EXIDS;
+               size = 1 << 16;
+       } else {
+               size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
+       }
+       smmu->streamid_mask = size - 1;
+       if (id & ID0_SMS) {
+               smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
+               size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
+               if (size == 0) {
+                       dev_err(smmu->dev,
+                               "stream-matching supported, but no SMRs 
present!\n");
+                       return -ENODEV;
+               }
+
+               /* Zero-initialised to mark as invalid */
+               smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
+                                         GFP_KERNEL);
+               if (!smmu->smrs)
+                       return -ENOMEM;
+
+               dev_notice(smmu->dev,
+                          "\tstream matching with %lu register groups", size);
+       }
+       /* s2cr->type == 0 means translation, so initialise explicitly */
+       smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
+                                        GFP_KERNEL);
+       if (!smmu->s2crs)
+               return -ENOMEM;
+       for (i = 0; i < size; i++)
+               smmu->s2crs[i] = s2cr_init_val;
+
+       smmu->num_mapping_groups = size;
+       mutex_init(&smmu->stream_map_mutex);
+       spin_lock_init(&smmu->global_sync_lock);
+
+       if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
+               smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
+               if (!(id & ID0_PTFS_NO_AARCH32S))
+                       smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
+       }
+
+       /* ID1 */
+       id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
+       smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
+
+       /* Check for size mismatch of SMMU address space from mapped region */
+       size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 
1);
+       size <<= smmu->pgshift;
+       if (smmu->cb_base != gr0_base + size)
+               dev_warn(smmu->dev,
+                       "SMMU address space size (0x%lx) differs from mapped 
region size (0x%tx)!\n",
+                       size * 2, (smmu->cb_base - gr0_base) * 2);
+
+       smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & 
ID1_NUMS2CB_MASK;
+       smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
+       if (smmu->num_s2_context_banks > smmu->num_context_banks) {
+               dev_err(smmu->dev, "impossible number of S2 context banks!\n");
+               return -ENODEV;
+       }
+       dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
+                  smmu->num_context_banks, smmu->num_s2_context_banks);
+       /*
+        * Cavium CN88xx erratum #27704.
+        * Ensure ASID and VMID allocation is unique across all SMMUs in
+        * the system.
+        */
+       if (smmu->model == CAVIUM_SMMUV2) {
+               smmu->cavium_id_base = atomic_add_return(
+                               smmu->num_context_banks, context_count);
+               smmu->cavium_id_base -= smmu->num_context_banks;
+               dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 
27704\n");
+       }
+       smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
+                                sizeof(*smmu->cbs), GFP_KERNEL);
+       if (!smmu->cbs)
+               return -ENOMEM;
+
+       /* ID2 */
+       id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
+       size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
+       smmu->ipa_size = size;
+
+       /* The output mask is also applied for bypass */
+       size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
+       smmu->pa_size = size;
+
+       if (id & ID2_VMID16)
+               smmu->features |= ARM_SMMU_FEAT_VMID16;
+
+       /*
+        * What the page table walker can address actually depends on which
+        * descriptor format is in use, but since a) we don't know that yet,
+        * and b) it can vary per context bank, this will have to do...
+        */
+       if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
+               dev_warn(smmu->dev,
+                        "failed to set DMA mask for table walker\n");
+
+       if (smmu->version < ARM_SMMU_V2) {
+               smmu->va_size = smmu->ipa_size;
+               if (smmu->version == ARM_SMMU_V1_64K)
+                       smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
+       } else {
+               size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
+               smmu->va_size = arm_smmu_id_size_to_bits(size);
+               if (id & ID2_PTFS_4K)
+                       smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
+               if (id & ID2_PTFS_16K)
+                       smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
+               if (id & ID2_PTFS_64K)
+                       smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
+       }
+
+       /* Now we've corralled the various formats, what'll it do? */
+       if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
+               smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
+       if (smmu->features &
+           (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
+               smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
+       if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
+               smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
+       if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
+               smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
+
+       if (ops->pgsize_bitmap == -1UL)
+               ops->pgsize_bitmap = smmu->pgsize_bitmap;
+       else
+               ops->pgsize_bitmap |= smmu->pgsize_bitmap;
+       dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
+                  smmu->pgsize_bitmap);
+
+
+       if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
+               dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
+                          smmu->va_size, smmu->ipa_size);
+
+       if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
+               dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
+                          smmu->ipa_size, smmu->pa_size);
+
+       return 0;
+}
+
+int arm_smmu_device_remove(struct platform_device *pdev)
+{
+       struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
+
+       if (!smmu)
+               return -ENODEV;
+
+       if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
+               dev_err(&pdev->dev, "removing device with active domains!\n");
+
+       /* Turn the thing off */
+       writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+       return 0;
+}
diff --git a/drivers/iommu/lib-arm-smmu.h b/drivers/iommu/lib-arm-smmu.h
new file mode 100644
index 0000000..d2f3980
--- /dev/null
+++ b/drivers/iommu/lib-arm-smmu.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2018, NVIDIA Corporation
+ * Author: Krishna Reddy <vdu...@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LIB_ARM_SMMU_H
+#define _LIB_ARM_SMMU_H
+
+#include <linux/iommu.h>
+#include <linux/irqreturn.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+/* Maximum number of context banks per SMMU */
+#define ARM_SMMU_MAX_CBS               128
+
+enum arm_smmu_arch_version {
+       ARM_SMMU_V1,
+       ARM_SMMU_V1_64K,
+       ARM_SMMU_V2,
+};
+
+enum arm_smmu_implementation {
+       GENERIC_SMMU,
+       ARM_MMU500,
+       CAVIUM_SMMUV2,
+};
+
+struct arm_smmu_s2cr {
+       struct iommu_group              *group;
+       int                             count;
+       enum arm_smmu_s2cr_type         type;
+       enum arm_smmu_s2cr_privcfg      privcfg;
+       u8                              cbndx;
+};
+
+struct arm_smmu_smr {
+       u16                             mask;
+       u16                             id;
+       bool                            valid;
+};
+
+struct arm_smmu_cb {
+       u64                             ttbr[2];
+       u32                             tcr[2];
+       u32                             mair[2];
+       struct arm_smmu_cfg             *cfg;
+};
+
+struct arm_smmu_device {
+       struct device                   *dev;
+
+       void __iomem                    *base;
+       void __iomem                    *cb_base;
+       /* Number of ARM SMMU's represented by arm_smmu_device
+        * Tegra194 uses two ARM SMMU's as one SMMU device.
+        */
+       u32                             num_smmus;
+       /* To hold the multiple base addresses when num_smmus > 1 */
+       void __iomem                    **bases;
+       unsigned long                   pgshift;
+
+#define ARM_SMMU_FEAT_COHERENT_WALK    (1 << 0)
+#define ARM_SMMU_FEAT_STREAM_MATCH     (1 << 1)
+#define ARM_SMMU_FEAT_TRANS_S1         (1 << 2)
+#define ARM_SMMU_FEAT_TRANS_S2         (1 << 3)
+#define ARM_SMMU_FEAT_TRANS_NESTED     (1 << 4)
+#define ARM_SMMU_FEAT_TRANS_OPS                (1 << 5)
+#define ARM_SMMU_FEAT_VMID16           (1 << 6)
+#define ARM_SMMU_FEAT_FMT_AARCH64_4K   (1 << 7)
+#define ARM_SMMU_FEAT_FMT_AARCH64_16K  (1 << 8)
+#define ARM_SMMU_FEAT_FMT_AARCH64_64K  (1 << 9)
+#define ARM_SMMU_FEAT_FMT_AARCH32_L    (1 << 10)
+#define ARM_SMMU_FEAT_FMT_AARCH32_S    (1 << 11)
+#define ARM_SMMU_FEAT_EXIDS            (1 << 12)
+       u32                             features;
+
+#define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
+       u32                             options;
+       enum arm_smmu_arch_version      version;
+       enum arm_smmu_implementation    model;
+
+       u32                             num_context_banks;
+       u32                             num_s2_context_banks;
+       DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
+       struct arm_smmu_cb              *cbs;
+       atomic_t                        irptndx;
+
+       u32                             num_mapping_groups;
+       u16                             streamid_mask;
+       u16                             smr_mask_mask;
+       struct arm_smmu_smr             *smrs;
+       struct arm_smmu_s2cr            *s2crs;
+       struct mutex                    stream_map_mutex;
+
+       unsigned long                   va_size;
+       unsigned long                   ipa_size;
+       unsigned long                   pa_size;
+       unsigned long                   pgsize_bitmap;
+
+       u32                             num_global_irqs;
+       u32                             num_context_irqs;
+       unsigned int                    *irqs;
+
+       u32                             cavium_id_base; /* Specific to Cavium */
+
+       spinlock_t                      global_sync_lock;
+       bool                            disable_bypass;
+
+       /* IOMMU core code handle */
+       struct iommu_device             iommu;
+};
+
+/* Common programming functions */
+int arm_smmu_device_cfg_probe_common(
+       struct arm_smmu_device *smmu, struct iommu_ops *ops,
+       atomic_t *cavium_smmu_context_count, int force_stage);
+irqreturn_t arm_smmu_global_fault(int irq, void *dev);
+int arm_smmu_device_remove(struct platform_device *pdev);
+void arm_smmu_device_reset(struct arm_smmu_device *smmu);
+void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu);
+
+/* For IOMMU ops */
+bool arm_smmu_capable(enum iommu_cap cap);
+struct iommu_domain *arm_smmu_domain_alloc_common(unsigned int type,
+                                                 bool using_legacy_binding);
+void arm_smmu_domain_free(struct iommu_domain *domain);
+int arm_smmu_attach_dev_common(struct iommu_domain *domain, struct device *dev,
+                              struct iommu_ops *ops);
+int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
+                phys_addr_t paddr, size_t size, int prot);
+size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
+                     size_t size);
+void arm_smmu_flush_iotlb_all(struct iommu_domain *domain);
+void arm_smmu_iotlb_sync(struct iommu_domain *domain);
+phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
+                                 dma_addr_t iova);
+int arm_smmu_add_device_common(struct device *dev,
+                              struct arm_smmu_device *smmu);
+void arm_smmu_remove_device_common(struct device *dev, struct iommu_ops *ops);
+struct iommu_group *arm_smmu_device_group(struct device *dev);
+int arm_smmu_domain_get_attr(struct iommu_domain *domain,
+                            enum iommu_attr attr, void *data);
+int arm_smmu_domain_set_attr(struct iommu_domain *domain,
+                            enum iommu_attr attr, void *data);
+int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args);
+void arm_smmu_get_resv_regions(struct device *dev,
+                              struct list_head *head);
+void arm_smmu_put_resv_regions(struct device *dev,
+                              struct list_head *head);
+
+#endif
-- 
2.1.4

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to