From: "Guo Ren (Alibaba DAMO Academy)" <guo...@kernel.org> Current implementation is wrong when iohgatp != bare. The RISC-V IOMMU specification has defined that the PDT is based on GPA, not SPA. So this patch fixes the problem, making PDT walk correctly when the G-stage table walk is enabled.
Fixes: 0c54acb8243d ("hw/riscv: add RISC-V IOMMU base emulation") Cc: qemu-sta...@nongnu.org Cc: Sebastien Boeuf <s...@rivosinc.com> Cc: Tomasz Jeznach <tjezn...@rivosinc.com> Signed-off-by: Guo Ren (Alibaba DAMO Academy) <guo...@kernel.org> --- hw/riscv/riscv-iommu.c | 148 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 146 insertions(+), 2 deletions(-) diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c index 96a7fbdefcf3..c4dccc9e5c5d 100644 --- a/hw/riscv/riscv-iommu.c +++ b/hw/riscv/riscv-iommu.c @@ -866,6 +866,145 @@ static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s, return true; } +/** + * pdt_memory_read: PDT wrapper of dma_memory_read. + * + * @s: IOMMU Device State + * @ctx: Device Translation Context with devid and pasid set + * nested: Add G-stage translation or not + * @addr: address within that address space + * @buf: buffer with the data transferred + * @len: length of the data transferred + * @attrs: memory transaction attributes + */ +static MemTxResult pdt_memory_read(RISCVIOMMUState *s, + RISCVIOMMUContext *ctx, bool nested, + dma_addr_t addr, + void *buf, dma_addr_t len, + MemTxAttrs attrs) +{ + uint64_t gatp_mode, pte; + struct { + unsigned char step; + unsigned char levels; + unsigned char ptidxbits; + unsigned char ptesize; + } sc; + MemTxResult ret; + dma_addr_t base = addr; + + if (!nested) + goto out; + + /* G stages translation mode */ + gatp_mode = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); + + /* G stages translation tables root pointer */ + base = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD)); + + /* Start at step 0 */ + sc.step = 0; + + if (s->fctl & RISCV_IOMMU_FCTL_GXL) { + /* 32bit mode for GXL == 1 */ + switch (gatp_mode) { + case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: + if (!(s->cap & RISCV_IOMMU_CAP_SV32X4)) { + return MEMTX_ACCESS_ERROR; + } + sc.levels = 2; + sc.ptidxbits = 10; + sc.ptesize = 4; + break; + default: + return MEMTX_ACCESS_ERROR; + } + } else { + /* 64bit mode for GXL == 0 */ + switch (gatp_mode) { + case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: + if (!(s->cap & RISCV_IOMMU_CAP_SV39X4)) { + return MEMTX_ACCESS_ERROR; + } + sc.levels = 3; + sc.ptidxbits = 9; + sc.ptesize = 8; + break; + case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: + if (!(s->cap & RISCV_IOMMU_CAP_SV48X4)) { + return MEMTX_ACCESS_ERROR; + } + sc.levels = 4; + sc.ptidxbits = 9; + sc.ptesize = 8; + break; + case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: + if (!(s->cap & RISCV_IOMMU_CAP_SV57X4)) { + return MEMTX_ACCESS_ERROR; + } + sc.levels = 5; + sc.ptidxbits = 9; + sc.ptesize = 8; + break; + default: + return MEMTX_ACCESS_ERROR; + } + } + + do { + const unsigned va_bits = (sc.step ? 0 : 2) + sc.ptidxbits; + const unsigned va_skip = TARGET_PAGE_BITS + sc.ptidxbits * + (sc.levels - 1 - sc.step); + const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1); + const dma_addr_t pte_addr = base + idx * sc.ptesize; + + /* Address range check before first level lookup */ + if (!sc.step) { + const uint64_t va_mask = (1ULL << (va_skip + va_bits)) - 1; + if ((addr & va_mask) != addr) { + return MEMTX_ACCESS_ERROR; + } + } + + /* Read page table entry */ + if (sc.ptesize == 4) { + uint32_t pte32 = 0; + ret = ldl_le_dma(s->target_as, pte_addr, &pte32, attrs); + pte = pte32; + } else { + ret = ldq_le_dma(s->target_as, pte_addr, &pte, attrs); + } + if (ret != MEMTX_OK) + return ret; + + sc.step++; + hwaddr ppn = pte >> PTE_PPN_SHIFT; + + if (!(pte & PTE_V)) { + return MEMTX_ACCESS_ERROR; /* Invalid PTE */ + } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) { + return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W */ + } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) { + return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W + PTE_X */ + } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) { + return MEMTX_ACCESS_ERROR; /* Misaligned PPN */ + } else if (!(pte & (PTE_R | PTE_W | PTE_X))) { + base = PPN_PHYS(ppn); /* Inner PTE, continue walking */ + } else { + /* Leaf PTE, translation completed. */ + base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1)); + break; + } + + if (sc.step == sc.levels) { + return MEMTX_ACCESS_ERROR; /* Can't find leaf PTE */ + } + } while (1); + +out: + return dma_memory_read(s->target_as, base, buf, len, attrs); +} + /* * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk * @@ -884,6 +1023,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) const size_t dc_len = sizeof(dc) >> dc_fmt; int depth; uint64_t de; + bool pdt_nested = false; switch (mode) { case RISCV_IOMMU_DDTP_MODE_OFF: @@ -1029,6 +1169,10 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; } + /* Detect nested PDT walk */ + pdt_nested = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD) != + RISCV_IOMMU_DC_IOHGATP_MODE_BARE; + for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) { riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK); @@ -1038,7 +1182,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) */ const int split = depth * 9 + 8; addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK; - if (dma_memory_read(s->target_as, addr, &de, sizeof(de), + if (pdt_memory_read(s, ctx, pdt_nested, addr, &de, sizeof(de), MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; } @@ -1053,7 +1197,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) /* Leaf entry in PDT */ addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK; - if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2, + if (pdt_memory_read(s, ctx, pdt_nested, addr, &dc.ta, sizeof(uint64_t) * 2, MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; } -- 2.40.1