This patch adds the basic infrastructure necessary to emulate an IOMMU visible to the guest. The DMAContext structure is extended with information and a callback describing the translation, and the various DMA functions used by devices will now perform IOMMU translation using this callback.
Cc: Michael S. Tsirkin <m...@redhat.com> Cc: Richard Henderson <r...@twiddle.net> Signed-off-by: Eduard - Gabriel Munteanu <eduard.munte...@linux360.ro> Signed-off-by: David Gibson <da...@gibson.dropbear.id.au> --- dma-helpers.c | 214 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ dma.h | 108 ++++++++++++++++++++++------- hw/qdev-dma.h | 4 +- 3 files changed, 299 insertions(+), 27 deletions(-) diff --git a/dma-helpers.c b/dma-helpers.c index 2dc4691..09591ef 100644 --- a/dma-helpers.c +++ b/dma-helpers.c @@ -9,6 +9,10 @@ #include "dma.h" #include "trace.h" +#include "range.h" +#include "qemu-thread.h" + +/* #define DEBUG_IOMMU */ void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma) { @@ -244,3 +248,213 @@ void dma_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, { bdrv_acct_start(bs, cookie, sg->size, type); } + +bool iommu_dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len, + DMADirection dir) +{ + target_phys_addr_t paddr, plen; + +#ifdef DEBUG_IOMMU + fprintf(stderr, "dma_memory_check context=%p addr=0x" DMA_ADDR_FMT + " len=0x" DMA_ADDR_FMT " dir=%d\n", dma, addr, len, dir); +#endif + + while (len) { + if (dma->translate(dma, addr, &paddr, &plen, dir) != 0) { + return false; + } + + /* The translation might be valid for larger regions. */ + if (plen > len) { + plen = len; + } + + len -= plen; + addr += plen; + } + + return true; +} + +int iommu_dma_memory_rw(DMAContext *dma, dma_addr_t addr, + void *buf, dma_addr_t len, DMADirection dir) +{ + target_phys_addr_t paddr, plen; + int err; + +#ifdef DEBUG_IOMMU + fprintf(stderr, "dma_memory_rw context=%p addr=0x" DMA_ADDR_FMT " len=0x" + DMA_ADDR_FMT " dir=%d\n", dma, addr, len, dir); +#endif + + while (len) { + err = dma->translate(dma, addr, &paddr, &plen, dir); + if (err) { + return -1; + } + + /* The translation might be valid for larger regions. */ + if (plen > len) { + plen = len; + } + + cpu_physical_memory_rw(paddr, buf, plen, + dir == DMA_DIRECTION_FROM_DEVICE); + + len -= plen; + addr += plen; + buf += plen; + } + + return 0; +} + +int iommu_dma_memory_zero(DMAContext *dma, dma_addr_t addr, dma_addr_t len) +{ + target_phys_addr_t paddr, plen; + int err; + +#ifdef DEBUG_IOMMU + fprintf(stderr, "dma_memory_zero context=%p addr=0x" DMA_ADDR_FMT + " len=0x" DMA_ADDR_FMT "\n", dma, addr, len); +#endif + + while (len) { + err = dma->translate(dma, addr, &paddr, &plen, + DMA_DIRECTION_FROM_DEVICE); + if (err) { + return err; + } + + /* The translation might be valid for larger regions. */ + if (plen > len) { + plen = len; + } + + cpu_physical_memory_zero(paddr, plen); + + len -= plen; + addr += plen; + } + + return 0; +} + +typedef struct { + unsigned long count; + QemuCond cond; +} DMAInvalidationState; + +typedef struct DMAMemoryMap DMAMemoryMap; +struct DMAMemoryMap { + dma_addr_t addr; + size_t len; + void *buf; + + DMAInvalidationState *invalidate; + QLIST_ENTRY(DMAMemoryMap) list; +}; + +void dma_context_init(DMAContext *dma, DMATranslateFunc fn) +{ +#ifdef DEBUG_IOMMU + fprintf(stderr, "dma_context_init(%p, %p)\n", dma, fn); +#endif + dma->translate = fn; + QLIST_INIT(&dma->memory_maps); +} + +void *iommu_dma_memory_map(DMAContext *dma, dma_addr_t addr, dma_addr_t *len, + DMADirection dir) +{ + int err; + target_phys_addr_t paddr, plen; + void *buf; + DMAMemoryMap *map; + + plen = *len; + err = dma->translate(dma, addr, &paddr, &plen, dir); + if (err) { + return NULL; + } + + /* + * If this is true, the virtual region is contiguous, + * but the translated physical region isn't. We just + * clamp *len, much like cpu_physical_memory_map() does. + */ + if (plen < *len) { + *len = plen; + } + + buf = cpu_physical_memory_map(paddr, &plen, + dir == DMA_DIRECTION_FROM_DEVICE); + *len = plen; + + /* We treat maps as remote TLBs to cope with stuff like AIO. */ + map = g_malloc(sizeof(DMAMemoryMap)); + map->addr = addr; + map->len = *len; + map->buf = buf; + map->invalidate = NULL; + + QLIST_INSERT_HEAD(&dma->memory_maps, map, list); + + return buf; +} + +void iommu_dma_memory_unmap(DMAContext *dma, void *buffer, dma_addr_t len, + DMADirection dir, dma_addr_t access_len) +{ + DMAMemoryMap *map; + + cpu_physical_memory_unmap(buffer, len, + dir == DMA_DIRECTION_FROM_DEVICE, + access_len); + + QLIST_FOREACH(map, &dma->memory_maps, list) { + if ((map->buf == buffer) && (map->len == len)) { + QLIST_REMOVE(map, list); + + if (map->invalidate) { + /* If this mapping was invalidated */ + if (--map->invalidate->count == 0) { + /* And we're the last mapping invalidated at the time */ + /* Then wake up whoever was waiting for the + * invalidation to complete */ + qemu_cond_signal(&map->invalidate->cond); + } + } + + free(map); + } + } + + + /* unmap called on a buffer that wasn't mapped */ + assert(false); +} + +extern QemuMutex qemu_global_mutex; + +void iommu_wait_for_invalidated_maps(DMAContext *dma, + dma_addr_t addr, dma_addr_t len) +{ + DMAMemoryMap *map; + DMAInvalidationState is; + + is.count = 0; + qemu_cond_init(&is.cond); + + QLIST_FOREACH(map, &dma->memory_maps, list) { + if (ranges_overlap(addr, len, map->addr, map->len)) { + is.count++; + map->invalidate = &is; + } + } + + if (is.count) { + qemu_cond_wait(&is.cond, &qemu_global_mutex); + } + assert(is.count == 0); +} diff --git a/dma.h b/dma.h index 876aea4..b57d72f 100644 --- a/dma.h +++ b/dma.h @@ -14,6 +14,7 @@ #include "hw/hw.h" #include "block.h" +typedef struct DMAContext DMAContext; typedef struct ScatterGatherEntry ScatterGatherEntry; typedef enum { @@ -30,28 +31,64 @@ struct QEMUSGList { }; #if defined(TARGET_PHYS_ADDR_BITS) -typedef target_phys_addr_t dma_addr_t; -#define DMA_ADDR_BITS TARGET_PHYS_ADDR_BITS -#define DMA_ADDR_FMT TARGET_FMT_plx +/* + * When an IOMMU is present, bus addresses become distinct from + * CPU/memory physical addresses and may be a different size. Because + * the IOVA size depends more on the bus than on the platform, we more + * or less have to treat these as 64-bit always to cover all (or at + * least most) cases. + */ +typedef uint64_t dma_addr_t; + +#define DMA_ADDR_BITS 64 +#define DMA_ADDR_FMT "%" PRIx64 + +typedef int DMATranslateFunc(DMAContext *dma, + dma_addr_t addr, + target_phys_addr_t *paddr, + target_phys_addr_t *len, + DMADirection dir); + +typedef struct DMAContext { + DMATranslateFunc *translate; + QLIST_HEAD(memory_maps, DMAMemoryMap) memory_maps; +} DMAContext; + +static inline bool dma_has_iommu(DMAContext *dma) +{ + return !!dma; +} /* Checks that the given range of addresses is valid for DMA. This is * useful for certain cases, but usually you should just use * dma_memory_{read,write}() and check for errors */ -static inline bool dma_memory_valid(DMAContext *dma, dma_addr_t addr, - dma_addr_t len, DMADirection dir) +bool iommu_dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len, + DMADirection dir); +static inline bool dma_memory_valid(DMAContext *dma, + dma_addr_t addr, dma_addr_t len, + DMADirection dir) { - /* Stub version, with no iommu we assume all bus addresses are valid */ - return true; + if (!dma_has_iommu(dma)) { + return true; + } else { + return iommu_dma_memory_valid(dma, addr, len, dir); + } } +int iommu_dma_memory_rw(DMAContext *dma, dma_addr_t addr, + void *buf, dma_addr_t len, DMADirection dir); static inline int dma_memory_rw(DMAContext *dma, dma_addr_t addr, void *buf, dma_addr_t len, DMADirection dir) { - /* Stub version when we have no iommu support */ - cpu_physical_memory_rw(addr, buf, (target_phys_addr_t)len, - dir == DMA_DIRECTION_FROM_DEVICE); - return 0; + if (!dma_has_iommu(dma)) { + /* Fast-path for no IOMMU */ + cpu_physical_memory_rw(addr, buf, len, + dir == DMA_DIRECTION_FROM_DEVICE); + return 0; + } else { + return iommu_dma_memory_rw(dma, addr, buf, len, dir); + } } static inline int dma_memory_read(DMAContext *dma, dma_addr_t addr, @@ -67,34 +104,53 @@ static inline int dma_memory_write(DMAContext *dma, dma_addr_t addr, DMA_DIRECTION_FROM_DEVICE); } +int iommu_dma_memory_zero(DMAContext *dma, dma_addr_t addr, dma_addr_t len); static inline int dma_memory_zero(DMAContext *dma, dma_addr_t addr, dma_addr_t len) { - /* Stub version when we have no iommu support */ - cpu_physical_memory_zero(addr, len); - return 0; + if (!dma_has_iommu(dma)) { + /* Fast-path for no IOMMU */ + cpu_physical_memory_zero(addr, len); + return 0; + } else { + return iommu_dma_memory_zero(dma, addr, len); + } } +void *iommu_dma_memory_map(DMAContext *dma, + dma_addr_t addr, dma_addr_t *len, + DMADirection dir); static inline void *dma_memory_map(DMAContext *dma, dma_addr_t addr, dma_addr_t *len, DMADirection dir) { - target_phys_addr_t xlen = *len; - void *p; - - p = cpu_physical_memory_map(addr, &xlen, - dir == DMA_DIRECTION_FROM_DEVICE); - *len = xlen; - return p; + if (!dma_has_iommu(dma)) { + target_phys_addr_t xlen = *len; + void *p; + + p = cpu_physical_memory_map(addr, &xlen, + dir == DMA_DIRECTION_FROM_DEVICE); + *len = xlen; + return p; + } else { + return iommu_dma_memory_map(dma, addr, len, dir); + } } +void iommu_dma_memory_unmap(DMAContext *dma, + void *buffer, dma_addr_t len, + DMADirection dir, dma_addr_t access_len); static inline void dma_memory_unmap(DMAContext *dma, void *buffer, dma_addr_t len, DMADirection dir, dma_addr_t access_len) { - return cpu_physical_memory_unmap(buffer, (target_phys_addr_t)len, - dir == DMA_DIRECTION_FROM_DEVICE, - access_len); + if (!dma_has_iommu(dma)) { + return cpu_physical_memory_unmap(buffer, (target_phys_addr_t)len, + dir == DMA_DIRECTION_FROM_DEVICE, + access_len); + } else { + iommu_dma_memory_unmap(dma, buffer, len, dir, access_len); + } } #define DEFINE_LDST_DMA(_lname, _sname, _bits, _end) \ @@ -135,6 +191,10 @@ DEFINE_LDST_DMA(q, q, 64, be); #undef DEFINE_LDST_DMA +void dma_context_init(DMAContext *dma, DMATranslateFunc fn); +void iommu_wait_for_invalidated_maps(DMAContext *dma, + dma_addr_t addr, dma_addr_t len); + struct ScatterGatherEntry { dma_addr_t base; dma_addr_t len; diff --git a/hw/qdev-dma.h b/hw/qdev-dma.h index f0ff558..6812735 100644 --- a/hw/qdev-dma.h +++ b/hw/qdev-dma.h @@ -6,7 +6,5 @@ * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. */ -#include "qdev-addr.h" - #define DEFINE_PROP_DMAADDR(_n, _s, _f, _d) \ - DEFINE_PROP_TADDR(_n, _s, _f, _d) + DEFINE_PROP_HEX64(_n, _s, _f, _d) -- 1.7.9.5