The current amdvi_page_walk() is designed to be called by the replay() method. Rather than drastically altering it, introduce helpers to fetch guest PTEs that will be used by a page walker implementation.
Signed-off-by: Alejandro Jimenez <alejandro.j.jime...@oracle.com> --- hw/i386/amd_iommu.c | 125 ++++++++++++++++++++++++++++++++++++++++++++ hw/i386/amd_iommu.h | 42 +++++++++++++++ 2 files changed, 167 insertions(+) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 0af873b66a31..d089fdc28ef1 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1563,6 +1563,131 @@ static const MemoryRegionOps amdvi_ir_ops = { } }; +/* + * For a PTE encoding a large page, return the page size it encodes as described + * by the AMD IOMMU Specification Table 14: Example Page Size Encodings. + * No need to adjust the value of the PTE to point to the first PTE in the large + * page since the encoding guarantees all "base" PTEs in the large page are the + * same. + */ +static uint64_t large_pte_page_size(uint64_t pte) +{ + assert(PTE_NEXT_LEVEL(pte) == 7); + + /* Determine size of the large/contiguous page encoded in the PTE */ + return PTE_LARGE_PAGE_SIZE(pte); +} + +/* + * Helper function to fetch a PTE using AMD v1 pgtable format. + * Returns: + * -2: The Page Table Root could not be read from DTE, or IOVA is larger than + * supported by current page table level encodedin DTE[Mode]. + * -1: PTE could not be read from guest memory during a page table walk. + * This means that the DTE has valid data, and one of the lower level + * entries in the Page Table could not be read. + * 0: PTE is marked not present, or entry is 0. + * >0: Leaf PTE value resolved from walking Guest IO Page Table. + */ +static uint64_t __attribute__((unused)) +fetch_pte(AMDVIAddressSpace *as, const hwaddr address, uint64_t dte, + hwaddr *page_size) +{ + IOMMUAccessFlags perms = amdvi_get_perms(dte); + + uint8_t level, mode; + uint64_t pte = dte, pte_addr; + + *page_size = 0; + + if (perms == IOMMU_NONE) { + return (uint64_t)-2; + } + + /* + * The Linux kernel driver initializes the default mode to 3, corresponding + * to a 39-bit GPA space, where each entry in the pagetable translates to a + * 1GB (2^30) page size. + */ + level = mode = get_pte_translation_mode(dte); + assert(mode > 0 && mode < 7); + + /* + * If IOVA is larger than the max supported by the current pgtable level, + * there is nothing to do. This signals that the pagetable level should be + * increased, or is an address meant to have special behavior like + * invalidating the entire cache. + */ + if (address > PT_LEVEL_MAX_ADDR(mode - 1)) { + /* IOVA too large for the current DTE */ + return (uint64_t)-2; + } + + do { + level -= 1; + + /* Update the page_size */ + *page_size = PTE_LEVEL_PAGE_SIZE(level); + + /* Permission bits are ANDed at every level, including the DTE */ + perms &= amdvi_get_perms(pte); + if (perms == IOMMU_NONE) { + return pte; + } + + /* Not Present */ + if (!IOMMU_PTE_PRESENT(pte)) { + return 0; + } + + /* Large or Leaf PTE found */ + if (PTE_NEXT_LEVEL(pte) == 7 || PTE_NEXT_LEVEL(pte) == 0) { + /* Leaf PTE found */ + break; + } + + /* + * Index the pgtable using the IOVA bits corresponding to current level + * and walk down to the lower level. + */ + pte_addr = NEXT_PTE_ADDR(pte, level, address); + pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn); + + if (pte == (uint64_t)-1) { + /* + * A returned PTE of -1 indicates a failure to read the page table + * entry from guest memory. + */ + if (level == mode - 1) { + /* Failure to retrieve the Page Table from Root Pointer */ + *page_size = 0; + return (uint64_t)-2; + } else { + /* Failure to read PTE. Page walk skips a page_size chunk */ + return pte; + } + } + } while (level > 0); + + /* + * Page walk ends when Next Level field on PTE shows that either a leaf PTE + * or a series of large PTEs have been reached. In the latter case, return + * the pointer to the first PTE of the series. + */ + assert(level == 0 || PTE_NEXT_LEVEL(pte) == 0 || PTE_NEXT_LEVEL(pte) == 7); + + /* + * In case the range starts in the middle of a contiguous page, need to + * return the first PTE + */ + if (PTE_NEXT_LEVEL(pte) == 7) { + /* Update page_size with the large PTE page size */ + *page_size = large_pte_page_size(pte); + } + + return pte; +} + /* * Toggle between address translation and passthrough modes by enabling the * corresponding memory regions. diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index c89e7dc9947d..fc4d2f7a4575 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -25,6 +25,8 @@ #include "hw/i386/x86-iommu.h" #include "qom/object.h" +#define GENMASK64(h, l) (((~0ULL) >> (63 - (h) + (l))) << (l)) + /* Capability registers */ #define AMDVI_CAPAB_BAR_LOW 0x04 #define AMDVI_CAPAB_BAR_HIGH 0x08 @@ -174,6 +176,46 @@ #define AMDVI_GATS_MODE (2ULL << 12) #define AMDVI_HATS_MODE (2ULL << 10) +/* Page Table format */ + +#define AMDVI_PTE_PR (1ULL << 0) +#define AMDVI_PTE_NEXT_LEVEL_MASK GENMASK64(11, 9) + +#define IOMMU_PTE_PRESENT(pte) ((pte) & AMDVI_PTE_PR) + +/* Using level=0 for leaf PTE at 4K page size */ +#define PT_LEVEL_SHIFT(level) (12 + ((level) * 9)) + +/* Return IOVA bit group used to index the Page Table at specific level */ +#define PT_LEVEL_INDEX(level, iova) (((iova) >> PT_LEVEL_SHIFT(level)) & \ + GENMASK64(8, 0)) + +/* Return the max address for a specified level i.e. max_oaddr */ +#define PT_LEVEL_MAX_ADDR(x) (((x) < 5) ? \ + ((1ULL << PT_LEVEL_SHIFT((x + 1))) - 1) : \ + (~(0ULL))) + +/* Extract the NextLevel field from PTE/PDE */ +#define PTE_NEXT_LEVEL(pte) (((pte) & AMDVI_PTE_NEXT_LEVEL_MASK) >> 9) + +/* Take page table level and return default pagetable size for level */ +#define PTE_LEVEL_PAGE_SIZE(level) (1ULL << (PT_LEVEL_SHIFT(level))) + +/* + * Return address of lower level page table encoded in PTE and specified by + * current level and corresponding IOVA bit group at such level. + */ +#define NEXT_PTE_ADDR(pte, level, iova) (((pte) & AMDVI_DEV_PT_ROOT_MASK) + \ + (PT_LEVEL_INDEX(level, iova) * 8)) + +/* + * Take a PTE value with mode=0x07 and return the page size it encodes. + */ +#define PTE_LARGE_PAGE_SIZE(pte) (1ULL << (1 + cto64(((pte) | 0xfffULL)))) + +/* Return number of PTEs to use for a given page size (expected power of 2) */ +#define PAGE_SIZE_PTE_COUNT(pgsz) (1ULL << ((ctz64(pgsz) - 12) % 9)) + /* IOTLB */ #define AMDVI_IOTLB_MAX_SIZE 1024 #define AMDVI_DEVID_SHIFT 36 -- 2.43.5