[PATCH v2 04/15] iommupt: Add the AMD IOMMU v1 page table format

Jason Gunthorpe Mon, 05 May 2025 07:19:50 -0700

AMD IOMMU v1 is unique in supporting contiguous pages with a variable size
and it can decode the full 64 bit VA space. Unlike other x86 page tables
this explicitly does not do sign extension as part of allowing the entire
64 bit VA space to be supported.


The general design is quite similar to the x86 PAE format, except with a
6th level and quite different PTE encoding.

This format is the only one that uses the PT_FEAT_DYNAMIC_TOP feature in
the existing code as the existing AMDv1 code starts out with a 3 level
table and adds levels on the fly if more IOVA is needed.

Comparing the performance of several operations to the existing version:

iommu_map()
   pgsz  ,avg new,old ns, min new,old ns  , min % (+ve is better)
     2^12,     65,64    ,      62,61      ,  -1.01
     2^13,     70,66    ,      67,62      ,  -8.08
     2^14,     73,69    ,      71,65      ,  -9.09
     2^15,     78,75    ,      75,71      ,  -5.05
     2^16,     89,89    ,      86,84      ,  -2.02
     2^17,    128,121   ,     124,112     , -10.10
     2^18,    175,175   ,     170,163     ,  -4.04
     2^19,    264,306   ,     261,279     ,   6.06
     2^20,    444,525   ,     438,489     ,  10.10
     2^21,     60,62    ,      58,59      ,   1.01
 256*2^12,    381,1833  ,     367,1795    ,  79.79
 256*2^21,    375,1623  ,     356,1555    ,  77.77
 256*2^30,    356,1338  ,     349,1277    ,  72.72

iommu_unmap()
   pgsz  ,avg new,old ns, min new,old ns  , min % (+ve is better)
     2^12,     76,89    ,      71,86      ,  17.17
     2^13,     79,89    ,      75,86      ,  12.12
     2^14,     78,90    ,      74,86      ,  13.13
     2^15,     82,89    ,      74,86      ,  13.13
     2^16,     79,89    ,      74,86      ,  13.13
     2^17,     81,89    ,      77,87      ,  11.11
     2^18,     90,92    ,      87,89      ,   2.02
     2^19,     91,93    ,      88,90      ,   2.02
     2^20,     96,95    ,      91,92      ,   1.01
     2^21,     72,88    ,      68,85      ,  20.20
 256*2^12,    372,6583  ,     364,6251    ,  94.94
 256*2^21,    398,6032  ,     392,5758    ,  93.93
 256*2^30,    396,5665  ,     389,5258    ,  92.92

The ~5-17x speedup when working with mutli-PTE map/unmaps is because the
AMD implementation rewalks the entire table on every new PTE while this
version retains its position. The same speedup will be seen with dirtys as
well.

The old implementation triggers a compiler optimization that ends up
generating a "rep stos" memset for contiguous PTEs. Since AMD can have
contiguous PTEs that span 2Kbytes of table this is a huge win compared to
a normal movq loop. It is why the unmap side has a fairly flat runtime as
the contiguous PTE sides increases. This version makes it explicit with a
memset64() call.

Signed-off-by: Jason Gunthorpe <j...@nvidia.com>
---
 drivers/iommu/Makefile                     |   1 +
 drivers/iommu/generic_pt/Kconfig           |  13 +
 drivers/iommu/generic_pt/fmt/Makefile      |  11 +
 drivers/iommu/generic_pt/fmt/amdv1.h       | 383 +++++++++++++++++++++
 drivers/iommu/generic_pt/fmt/defs_amdv1.h  |  21 ++
 drivers/iommu/generic_pt/fmt/iommu_amdv1.c |  15 +
 include/linux/generic_pt/common.h          |  19 +
 include/linux/generic_pt/iommu.h           |  29 ++
 8 files changed, 492 insertions(+)
 create mode 100644 drivers/iommu/generic_pt/fmt/Makefile
 create mode 100644 drivers/iommu/generic_pt/fmt/amdv1.h
 create mode 100644 drivers/iommu/generic_pt/fmt/defs_amdv1.h
 create mode 100644 drivers/iommu/generic_pt/fmt/iommu_amdv1.c

diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index fe91d770abe16c..da5ff67134e6e5 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-y += amd/ intel/ arm/ iommufd/ riscv/
+obj-$(CONFIG_GENERIC_PT) += generic_pt/fmt/
 obj-$(CONFIG_IOMMU_API) += iommu.o
 obj-$(CONFIG_IOMMU_SUPPORT) += iommu-pages.o
 obj-$(CONFIG_IOMMU_API) += iommu-traces.o
diff --git a/drivers/iommu/generic_pt/Kconfig b/drivers/iommu/generic_pt/Kconfig
index 73b7a54375f9bd..887c585a66699a 100644
--- a/drivers/iommu/generic_pt/Kconfig
+++ b/drivers/iommu/generic_pt/Kconfig
@@ -32,4 +32,17 @@ config IOMMU_PT
          IOMMU_PT provides an implementation of the page table operations
          related struct iommu_domain using GENERIC_PT to abstract the page
          table format.
+
+if IOMMU_PT
+config IOMMU_PT_AMDV1
+       tristate "IOMMU page table for 64 bit AMD IOMMU v1"
+       depends on !GENERIC_ATOMIC64 # for cmpxchg64
+       default n
+       help
+         iommu_domain implementation for the AMD v1 page table. AMDv1 is the
+         "host" page table. It supports granular page sizes of almost every
+         power of 2 and decodes an full 64 bit IOVA space.
+
+         Selected automatically by an IOMMU driver that uses this format.
+endif
 endif
diff --git a/drivers/iommu/generic_pt/fmt/Makefile 
b/drivers/iommu/generic_pt/fmt/Makefile
new file mode 100644
index 00000000000000..a4d83b7e0cf691
--- /dev/null
+++ b/drivers/iommu/generic_pt/fmt/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+
+iommu_pt_fmt-$(CONFIG_IOMMU_PT_AMDV1) += amdv1
+
+define create_format
+obj-$(2) += iommu_$(1).o
+
+endef
+
+$(eval $(foreach fmt,$(iommu_pt_fmt-y),$(call create_format,$(fmt),y)))
+$(eval $(foreach fmt,$(iommu_pt_fmt-m),$(call create_format,$(fmt),m)))
diff --git a/drivers/iommu/generic_pt/fmt/amdv1.h 
b/drivers/iommu/generic_pt/fmt/amdv1.h
new file mode 100644
index 00000000000000..0f445ab59c8495
--- /dev/null
+++ b/drivers/iommu/generic_pt/fmt/amdv1.h
@@ -0,0 +1,383 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
+ *
+ * AMD IOMMU v1 page table
+ *
+ * This is described in Section "2.2.3 I/O Page Tables for Host Translations"
+ * of the "AMD I/O Virtualization Technology (IOMMU) Specification"
+ *
+ * Note the level numbering here matches the core code, so level 0 is the same
+ * as mode 1.
+ *
+ */
+#ifndef __GENERIC_PT_FMT_AMDV1_H
+#define __GENERIC_PT_FMT_AMDV1_H
+
+#include "defs_amdv1.h"
+#include "../pt_defs.h"
+
+#include <asm/page.h>
+#include <linux/bitfield.h>
+#include <linux/container_of.h>
+#include <linux/mem_encrypt.h>
+#include <linux/minmax.h>
+#include <linux/sizes.h>
+#include <linux/string.h>
+
+enum {
+       PT_MAX_OUTPUT_ADDRESS_LG2 = 52,
+       PT_MAX_VA_ADDRESS_LG2 = 64,
+       PT_ENTRY_WORD_SIZE = sizeof(u64),
+       PT_MAX_TOP_LEVEL = 5,
+       PT_GRANULE_LG2SZ = 12,
+       PT_TABLEMEM_LG2SZ = 12,
+};
+
+/* PTE bits */
+enum {
+       AMDV1PT_FMT_PR = BIT(0),
+       AMDV1PT_FMT_D = BIT(6),
+       AMDV1PT_FMT_NEXT_LEVEL = GENMASK_ULL(11, 9),
+       AMDV1PT_FMT_OA = GENMASK_ULL(51, 12),
+       AMDV1PT_FMT_FC = BIT_ULL(60),
+       AMDV1PT_FMT_IR = BIT_ULL(61),
+       AMDV1PT_FMT_IW = BIT_ULL(62),
+};
+
+/*
+ * gcc 13 has a bug where it thinks the output of FIELD_GET() is an enum, make
+ * these defines to avoid it.
+ */
+#define AMDV1PT_FMT_NL_DEFAULT 0
+#define AMDV1PT_FMT_NL_SIZE 7
+
+#define common_to_amdv1pt(common_ptr) \
+       container_of_const(common_ptr, struct pt_amdv1, common)
+#define to_amdv1pt(pts) common_to_amdv1pt((pts)->range->common)
+
+static inline pt_oaddr_t amdv1pt_table_pa(const struct pt_state *pts)
+{
+       return oalog2_mul(FIELD_GET(AMDV1PT_FMT_OA, pts->entry),
+                         PT_GRANULE_LG2SZ);
+}
+#define pt_table_pa amdv1pt_table_pa
+
+/* Returns the oa for the start of the contiguous entry */
+static inline pt_oaddr_t amdv1pt_entry_oa(const struct pt_state *pts)
+{
+       pt_oaddr_t oa = FIELD_GET(AMDV1PT_FMT_OA, pts->entry);
+
+       if (FIELD_GET(AMDV1PT_FMT_NEXT_LEVEL, pts->entry) ==
+           AMDV1PT_FMT_NL_SIZE) {
+               unsigned int sz_bits = oalog2_ffz(oa);
+
+               oa = oalog2_set_mod(oa, 0, sz_bits);
+       } else if (PT_WARN_ON(FIELD_GET(AMDV1PT_FMT_NEXT_LEVEL, pts->entry) !=
+                             AMDV1PT_FMT_NL_DEFAULT))
+               return 0;
+       return oalog2_mul(oa, PT_GRANULE_LG2SZ);
+}
+#define pt_entry_oa amdv1pt_entry_oa
+
+static inline bool amdv1pt_can_have_leaf(const struct pt_state *pts)
+{
+       /*
+        * Table 15: Page Table Level Parameters
+        * The top most level cannot have translation entries
+        */
+       return pts->level < PT_MAX_TOP_LEVEL;
+}
+#define pt_can_have_leaf amdv1pt_can_have_leaf
+
+static inline unsigned int amdv1pt_table_item_lg2sz(const struct pt_state *pts)
+{
+       return PT_GRANULE_LG2SZ +
+              (PT_TABLEMEM_LG2SZ - ilog2(PT_ENTRY_WORD_SIZE)) * pts->level;
+}
+#define pt_table_item_lg2sz amdv1pt_table_item_lg2sz
+
+static inline unsigned int
+amdv1pt_entry_num_contig_lg2(const struct pt_state *pts)
+{
+       u32 code;
+
+       if (FIELD_GET(AMDV1PT_FMT_NEXT_LEVEL, pts->entry) ==
+           AMDV1PT_FMT_NL_DEFAULT)
+               return ilog2(1);
+
+       PT_WARN_ON(FIELD_GET(AMDV1PT_FMT_NEXT_LEVEL, pts->entry) !=
+                  AMDV1PT_FMT_NL_SIZE);
+
+       /*
+        * The contiguous size is encoded in the length of a string of 1's in
+        * the low bits of the OA. Reverse the equation:
+        *  code = log2_to_int(num_contig_lg2 + item_lg2sz -
+        *              PT_GRANULE_LG2SZ - 1) - 1
+        * Which can be expressed as:
+        *  num_contig_lg2 = oalog2_ffz(code) + 1 -
+        *              item_lg2sz - PT_GRANULE_LG2SZ
+        *
+        * Assume the bit layout is correct and remove the masking. Reorganize
+        * the equation to move all the arithmetic before the ffz.
+        */
+       code = pts->entry >> (__bf_shf(AMDV1PT_FMT_OA) - 1 +
+                             pt_table_item_lg2sz(pts) - PT_GRANULE_LG2SZ);
+       return log2_ffz_t(u32, code);
+}
+#define pt_entry_num_contig_lg2 amdv1pt_entry_num_contig_lg2
+
+static inline unsigned int amdv1pt_num_items_lg2(const struct pt_state *pts)
+{
+       /*
+        * Top entry covers bits [63:57] only, this is handled through
+        * max_vasz_lg2.
+        */
+       if (PT_WARN_ON(pts->level == 5))
+               return 7;
+       return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64));
+}
+#define pt_num_items_lg2 amdv1pt_num_items_lg2
+
+static inline pt_vaddr_t amdv1pt_possible_sizes(const struct pt_state *pts)
+{
+       unsigned int isz_lg2 = pt_table_item_lg2sz(pts);
+
+       if (!amdv1pt_can_have_leaf(pts))
+               return 0;
+
+       /*
+        * Table 14: Example Page Size Encodings
+        * Address bits 51:32 can be used to encode page sizes greater that 4
+        * Gbytes. Address bits 63:52 are zero-extended.
+        *
+        * 512GB Pages are not supported due to a hardware bug.
+        * Otherwise every power of two size is supported.
+        */
+       return GENMASK_ULL(min(51, isz_lg2 + amdv1pt_num_items_lg2(pts) - 1),
+                          isz_lg2) & ~SZ_512G;
+}
+#define pt_possible_sizes amdv1pt_possible_sizes
+
+static inline enum pt_entry_type amdv1pt_load_entry_raw(struct pt_state *pts)
+{
+       const u64 *tablep = pt_cur_table(pts, u64) + pts->index;
+       unsigned int next_level;
+       u64 entry;
+
+       pts->entry = entry = READ_ONCE(*tablep);
+       if (!(entry & AMDV1PT_FMT_PR))
+               return PT_ENTRY_EMPTY;
+
+       next_level = FIELD_GET(AMDV1PT_FMT_NEXT_LEVEL, pts->entry);
+       if (pts->level == 0 || next_level == AMDV1PT_FMT_NL_DEFAULT ||
+           next_level == AMDV1PT_FMT_NL_SIZE)
+               return PT_ENTRY_OA;
+       return PT_ENTRY_TABLE;
+}
+#define pt_load_entry_raw amdv1pt_load_entry_raw
+
+static inline void
+amdv1pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa,
+                          unsigned int oasz_lg2,
+                          const struct pt_write_attrs *attrs)
+{
+       unsigned int isz_lg2 = pt_table_item_lg2sz(pts);
+       u64 *tablep = pt_cur_table(pts, u64) + pts->index;
+       u64 entry;
+
+       entry = AMDV1PT_FMT_PR |
+               FIELD_PREP(AMDV1PT_FMT_OA, log2_div(oa, PT_GRANULE_LG2SZ)) |
+               attrs->descriptor_bits;
+
+       if (oasz_lg2 == isz_lg2) {
+               entry |= FIELD_PREP(AMDV1PT_FMT_NEXT_LEVEL,
+                                   AMDV1PT_FMT_NL_DEFAULT);
+               WRITE_ONCE(*tablep, entry);
+       } else {
+               unsigned int num_contig_lg2 = oasz_lg2 - isz_lg2;
+               u64 *end = tablep + log2_to_int(num_contig_lg2);
+
+               entry |= FIELD_PREP(AMDV1PT_FMT_NEXT_LEVEL,
+                                   AMDV1PT_FMT_NL_SIZE) |
+                        FIELD_PREP(AMDV1PT_FMT_OA,
+                                   oalog2_to_int(oasz_lg2 - PT_GRANULE_LG2SZ -
+                                                 1) -
+                                           1);
+
+               /* See amdv1pt_clear_entry() */
+               if (num_contig_lg2 <= ilog2(32)) {
+                       for (; tablep != end; tablep++)
+                               WRITE_ONCE(*tablep, entry);
+               } else {
+                       memset64(tablep, entry, log2_to_int(num_contig_lg2));
+               }
+       }
+       pts->entry = entry;
+}
+#define pt_install_leaf_entry amdv1pt_install_leaf_entry
+
+static inline bool amdv1pt_install_table(struct pt_state *pts,
+                                        pt_oaddr_t table_pa,
+                                        const struct pt_write_attrs *attrs)
+{
+       u64 *tablep = pt_cur_table(pts, u64) + pts->index;
+       u64 entry;
+
+       /*
+        * IR and IW are ANDed from the table levels along with the PTE. We
+        * always control permissions from the PTE, so always set IR and IW for
+        * tables.
+        */
+       entry = AMDV1PT_FMT_PR |
+               FIELD_PREP(AMDV1PT_FMT_NEXT_LEVEL, pts->level) |
+               FIELD_PREP(AMDV1PT_FMT_OA,
+                          log2_div(table_pa, PT_GRANULE_LG2SZ)) |
+               AMDV1PT_FMT_IR | AMDV1PT_FMT_IW;
+       if (pts_feature(pts, PT_FEAT_AMDV1_ENCRYPT_TABLES))
+               entry = __sme_set(entry);
+       return pt_table_install64(tablep, entry, pts->entry);
+}
+#define pt_install_table amdv1pt_install_table
+
+static inline void amdv1pt_attr_from_entry(const struct pt_state *pts,
+                                          struct pt_write_attrs *attrs)
+{
+       attrs->descriptor_bits =
+               pts->entry & (AMDV1PT_FMT_FC | AMDV1PT_FMT_IR | AMDV1PT_FMT_IW);
+}
+#define pt_attr_from_entry amdv1pt_attr_from_entry
+
+static inline void amdv1pt_clear_entry(struct pt_state *pts,
+                                      unsigned int num_contig_lg2)
+{
+       u64 *tablep = pt_cur_table(pts, u64) + pts->index;
+       u64 *end = tablep + log2_to_int(num_contig_lg2);
+
+       /*
+        * gcc generates rep stos for the io-pgtable code, and this difference
+        * can show in microbenchmarks with larger contiguous page sizes.
+        * rep is slower for small cases.
+        */
+       if (num_contig_lg2 <= ilog2(32)) {
+               for (; tablep != end; tablep++)
+                       WRITE_ONCE(*tablep, 0);
+       } else {
+               memset64(tablep, 0, log2_to_int(num_contig_lg2));
+       }
+}
+#define pt_clear_entry amdv1pt_clear_entry
+
+static inline bool amdv1pt_entry_write_is_dirty(const struct pt_state *pts)
+{
+       unsigned int num_contig_lg2 = amdv1pt_entry_num_contig_lg2(pts);
+       u64 *tablep = pt_cur_table(pts, u64) +
+                     log2_set_mod(pts->index, 0, num_contig_lg2);
+       u64 *end = tablep + log2_to_int(num_contig_lg2);
+
+       for (; tablep != end; tablep++)
+               if (READ_ONCE(*tablep) & AMDV1PT_FMT_D)
+                       return true;
+       return false;
+}
+#define pt_entry_write_is_dirty amdv1pt_entry_write_is_dirty
+
+static inline void amdv1pt_entry_set_write_clean(struct pt_state *pts)
+{
+       unsigned int num_contig_lg2 = amdv1pt_entry_num_contig_lg2(pts);
+       u64 *tablep = pt_cur_table(pts, u64) +
+                     log2_set_mod(pts->index, 0, num_contig_lg2);
+       u64 *end = tablep + log2_to_int(num_contig_lg2);
+
+       for (; tablep != end; tablep++)
+               WRITE_ONCE(*tablep, READ_ONCE(*tablep) & ~(u64)AMDV1PT_FMT_D);
+}
+#define pt_entry_set_write_clean amdv1pt_entry_set_write_clean
+
+static inline bool amdv1pt_entry_make_write_dirty(struct pt_state *pts)
+{
+       u64 *tablep = pt_cur_table(pts, u64) + pts->index;
+       u64 new = pts->entry | AMDV1PT_FMT_D;
+
+       return try_cmpxchg64(tablep, &pts->entry, new);
+}
+#define pt_entry_make_write_dirty amdv1pt_entry_make_write_dirty
+
+/* --- iommu */
+#include <linux/generic_pt/iommu.h>
+#include <linux/iommu.h>
+
+#define pt_iommu_table pt_iommu_amdv1
+
+/* The common struct is in the per-format common struct */
+static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table)
+{
+       return &container_of(iommu_table, struct pt_iommu_amdv1, iommu)
+                       ->amdpt.common;
+}
+
+static inline struct pt_iommu *iommu_from_common(struct pt_common *common)
+{
+       return &container_of(common, struct pt_iommu_amdv1, 
amdpt.common)->iommu;
+}
+
+static inline int amdv1pt_iommu_set_prot(struct pt_common *common,
+                                        struct pt_write_attrs *attrs,
+                                        unsigned int iommu_prot)
+{
+       u64 pte = 0;
+
+       if (pt_feature(common, PT_FEAT_AMDV1_FORCE_COHERENCE))
+               pte |= AMDV1PT_FMT_FC;
+       if (iommu_prot & IOMMU_READ)
+               pte |= AMDV1PT_FMT_IR;
+       if (iommu_prot & IOMMU_WRITE)
+               pte |= AMDV1PT_FMT_IW;
+
+       /*
+        * Ideally we'd have an IOMMU_ENCRYPTED flag set by higher levels to
+        * control this. For now if the tables use sme_set then so do the ptes.
+        */
+       if (pt_feature(common, PT_FEAT_AMDV1_ENCRYPT_TABLES))
+               pte = __sme_set(pte);
+
+       attrs->descriptor_bits = pte;
+       return 0;
+}
+#define pt_iommu_set_prot amdv1pt_iommu_set_prot
+
+static inline int amdv1pt_iommu_fmt_init(struct pt_iommu_amdv1 *iommu_table,
+                                        const struct pt_iommu_amdv1_cfg *cfg)
+{
+       struct pt_amdv1 *table = &iommu_table->amdpt;
+       unsigned int max_vasz_lg2 = PT_MAX_VA_ADDRESS_LG2;
+
+       if (cfg->starting_level == 0 || cfg->starting_level > PT_MAX_TOP_LEVEL)
+               return -EINVAL;
+
+       if (!pt_feature(&table->common, PT_FEAT_DYNAMIC_TOP) &&
+           cfg->starting_level != PT_MAX_TOP_LEVEL)
+               max_vasz_lg2 = PT_GRANULE_LG2SZ +
+                              (PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64))) *
+                                      (cfg->starting_level + 1);
+
+       table->common.max_vasz_lg2 =
+               min(max_vasz_lg2, cfg->common.hw_max_vasz_lg2);
+       table->common.max_oasz_lg2 =
+               min(PT_MAX_OUTPUT_ADDRESS_LG2, cfg->common.hw_max_oasz_lg2);
+       pt_top_set_level(&table->common, cfg->starting_level);
+       return 0;
+}
+#define pt_iommu_fmt_init amdv1pt_iommu_fmt_init
+
+static inline void
+amdv1pt_iommu_fmt_hw_info(struct pt_iommu_amdv1 *table,
+                         const struct pt_range *top_range,
+                         struct pt_iommu_amdv1_hw_info *info)
+{
+       info->host_pt_root = virt_to_phys(top_range->top_table);
+       PT_WARN_ON(log2_mod_t(phys_addr_t, info->host_pt_root, 12));
+       info->mode = top_range->top_level + 1;
+}
+#define pt_iommu_fmt_hw_info amdv1pt_iommu_fmt_hw_info
+#endif
diff --git a/drivers/iommu/generic_pt/fmt/defs_amdv1.h 
b/drivers/iommu/generic_pt/fmt/defs_amdv1.h
new file mode 100644
index 00000000000000..0b9614ca6d103c
--- /dev/null
+++ b/drivers/iommu/generic_pt/fmt/defs_amdv1.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
+ *
+ */
+#ifndef __GENERIC_PT_FMT_DEFS_AMDV1_H
+#define __GENERIC_PT_FMT_DEFS_AMDV1_H
+
+#include <linux/generic_pt/common.h>
+#include <linux/types.h>
+
+typedef u64 pt_vaddr_t;
+typedef u64 pt_oaddr_t;
+
+struct amdv1pt_write_attrs {
+       u64 descriptor_bits;
+       gfp_t gfp;
+};
+#define pt_write_attrs amdv1pt_write_attrs
+
+#endif
diff --git a/drivers/iommu/generic_pt/fmt/iommu_amdv1.c 
b/drivers/iommu/generic_pt/fmt/iommu_amdv1.c
new file mode 100644
index 00000000000000..72a2337d0c5510
--- /dev/null
+++ b/drivers/iommu/generic_pt/fmt/iommu_amdv1.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
+ */
+#define PT_FMT amdv1
+#define PT_SUPPORTED_FEATURES                                          \
+       (BIT(PT_FEAT_FULL_VA) | BIT(PT_FEAT_DYNAMIC_TOP) |             \
+        BIT(PT_FEAT_FLUSH_RANGE) | BIT(PT_FEAT_FLUSH_RANGE_NO_GAPS) | \
+        BIT(PT_FEAT_AMDV1_ENCRYPT_TABLES) |                           \
+        BIT(PT_FEAT_AMDV1_FORCE_COHERENCE))
+#define PT_FORCE_ENABLED_FEATURES                                       \
+       (BIT(PT_FEAT_DYNAMIC_TOP) | BIT(PT_FEAT_AMDV1_ENCRYPT_TABLES) | \
+        BIT(PT_FEAT_AMDV1_FORCE_COHERENCE))
+
+#include "iommu_template.h"
diff --git a/include/linux/generic_pt/common.h 
b/include/linux/generic_pt/common.h
index 91869fad33fbdf..b127d8915d48fc 100644
--- a/include/linux/generic_pt/common.h
+++ b/include/linux/generic_pt/common.h
@@ -131,4 +131,23 @@ enum pt_features {
        PT_FEAT_FMT_START,
 };
 
+struct pt_amdv1 {
+       struct pt_common common;
+};
+
+enum {
+       /*
+        * The memory backing the tables is encrypted. Use __sme_set() to adjust
+        * the page table pointers in the tree. This only works with
+        * CONFIG_AMD_MEM_ENCRYPT.
+        */
+       PT_FEAT_AMDV1_ENCRYPT_TABLES = PT_FEAT_FMT_START,
+       /*
+        * The PTEs are set to prevent cache incoherent traffic, such as PCI no
+        * snoop. This is set either at creation time or before the first map
+        * operation.
+        */
+       PT_FEAT_AMDV1_FORCE_COHERENCE,
+};
+
 #endif
diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h
index 636f856cc5fdcc..f454680027659d 100644
--- a/include/linux/generic_pt/iommu.h
+++ b/include/linux/generic_pt/iommu.h
@@ -109,4 +109,33 @@ struct pt_iommu_cfg {
        u8 hw_max_oasz_lg2;
 };
 
+/* Generate the exported function signatures from iommu_pt.h */
+#define IOMMU_PROTOTYPES(fmt)                                             \
+       int pt_iommu_##fmt##_init(struct pt_iommu_##fmt *table,           \
+                                 const struct pt_iommu_##fmt##_cfg *cfg, \
+                                 gfp_t gfp);                             \
+       void pt_iommu_##fmt##_hw_info(struct pt_iommu_##fmt *table,       \
+                                     struct pt_iommu_##fmt##_hw_info *info)
+#define IOMMU_FORMAT(fmt, member)       \
+       struct pt_iommu_##fmt {         \
+               struct pt_iommu iommu;  \
+               struct pt_##fmt member; \
+       };                              \
+       IOMMU_PROTOTYPES(fmt)
+
+
+struct pt_iommu_amdv1_cfg {
+       struct pt_iommu_cfg common;
+       unsigned int starting_level;
+};
+
+struct pt_iommu_amdv1_hw_info {
+       u64 host_pt_root;
+       u8 mode;
+};
+
+IOMMU_FORMAT(amdv1, amdpt);
+
+#undef IOMMU_PROTOTYPES
+#undef IOMMU_FORMAT
 #endif
-- 
2.43.0

[PATCH v2 04/15] iommupt: Add the AMD IOMMU v1 page table format

Reply via email to