This patch adds the basic infrastructure necessary to emulate an IOMMU visible to the guest. The DMAContext structure is extended with information and a callback describing the translation, and the various DMA functions used by devices will now perform IOMMU translation using this callback.
Cc: Michael S. Tsirkin <m...@redhat.com> Cc: Richard Henderson <r...@twiddle.net> Signed-off-by: Eduard - Gabriel Munteanu <eduard.munte...@linux360.ro> Signed-off-by: David Gibson <da...@gibson.dropbear.id.au> --- configure | 12 ++++ dma-helpers.c | 191 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ dma.h | 125 +++++++++++++++++++++++++++++++------- 3 files changed, 306 insertions(+), 22 deletions(-) diff --git a/configure b/configure index a5eb832..e6fba2f 100755 --- a/configure +++ b/configure @@ -138,6 +138,7 @@ linux_aio="" cap_ng="" attr="" libattr="" +iommu="yes" xfs="" vhost_net="no" @@ -784,6 +785,10 @@ for opt do ;; --enable-vhost-net) vhost_net="yes" ;; + --enable-iommu) iommu="yes" + ;; + --disable-iommu) iommu="no" + ;; --disable-opengl) opengl="no" ;; --enable-opengl) opengl="yes" @@ -1085,6 +1090,8 @@ echo " --enable-docs enable documentation build" echo " --disable-docs disable documentation build" echo " --disable-vhost-net disable vhost-net acceleration support" echo " --enable-vhost-net enable vhost-net acceleration support" +echo " --disable-iommu disable IOMMU emulation support" +echo " --enable-iommu enable IOMMU emulation support" echo " --enable-trace-backend=B Set trace backend" echo " Available backends:" $("$source_path"/scripts/tracetool --list-backends) echo " --with-trace-file=NAME Full PATH,NAME of file to store traces" @@ -2924,6 +2931,7 @@ echo "posix_madvise $posix_madvise" echo "uuid support $uuid" echo "libcap-ng support $cap_ng" echo "vhost-net support $vhost_net" +echo "IOMMU support $iommu" echo "Trace backend $trace_backend" echo "Trace output file $trace_file-<pid>" echo "spice support $spice" @@ -3801,6 +3809,10 @@ if test "$target_softmmu" = "yes" -a \( \ echo "CONFIG_NEED_MMU=y" >> $config_target_mak fi +if test "$iommu" = "yes" ; then + echo "CONFIG_IOMMU=y" >> $config_host_mak +fi + if test "$gprof" = "yes" ; then echo "TARGET_GPROF=yes" >> $config_target_mak if test "$target_linux_user" = "yes" ; then diff --git a/dma-helpers.c b/dma-helpers.c index 9dcfb2c..248575d 100644 --- a/dma-helpers.c +++ b/dma-helpers.c @@ -10,6 +10,9 @@ #include "dma.h" #include "block_int.h" #include "trace.h" +#include "range.h" + +/* #define DEBUG_IOMMU */ void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma) { @@ -255,3 +258,191 @@ void dma_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, { bdrv_acct_start(bs, cookie, sg->size, type); } + +#ifdef CONFIG_IOMMU +bool __dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len, + DMADirection dir) +{ + target_phys_addr_t paddr, plen; + +#ifdef DEBUG_IOMMU + fprintf(stderr, "dma_memory_check context=%p addr=0x" DMA_ADDR_FMT + " len=0x" DMA_ADDR_FMT " dir=%d\n", dma, addr, len, dir); +#endif + + while (len) { + if (dma->translate(dma, addr, &paddr, &plen, dir) != 0) { + return false; + } + + /* The translation might be valid for larger regions. */ + if (plen > len) { + plen = len; + } + + len -= plen; + addr += plen; + } + + return true; +} + +int __dma_memory_rw(DMAContext *dma, dma_addr_t addr, + void *buf, dma_addr_t len, DMADirection dir) +{ + target_phys_addr_t paddr, plen; + int err; + +#ifdef DEBUG_IOMMU + fprintf(stderr, "dma_memory_rw context=%p addr=0x" DMA_ADDR_FMT " len=0x" + DMA_ADDR_FMT " dir=%d\n", dma, addr, len, dir); +#endif + + while (len) { + err = dma->translate(dma, addr, &paddr, &plen, dir); + if (err) { + return -1; + } + + /* The translation might be valid for larger regions. */ + if (plen > len) { + plen = len; + } + + cpu_physical_memory_rw(paddr, buf, plen, + dir == DMA_DIRECTION_FROM_DEVICE); + + len -= plen; + addr += plen; + buf += plen; + } + + return 0; +} + +int __dma_memory_zero(DMAContext *dma, dma_addr_t addr, dma_addr_t len) +{ + target_phys_addr_t paddr, plen; + int err; + +#ifdef DEBUG_IOMMU + fprintf(stderr, "dma_memory_zero context=%p addr=0x" DMA_ADDR_FMT + " len=0x" DMA_ADDR_FMT "\n", dma, addr, len); +#endif + + while (len) { + err = dma->translate(dma, addr, &paddr, &plen, + DMA_DIRECTION_FROM_DEVICE); + if (err) { + return err; + } + + /* The translation might be valid for larger regions. */ + if (plen > len) { + plen = len; + } + + cpu_physical_memory_zero(paddr, plen); + + len -= plen; + addr += plen; + } + + return 0; +} + +typedef struct DMAMemoryMap DMAMemoryMap; +struct DMAMemoryMap { + dma_addr_t addr; + size_t len; + void *buf; + DMAInvalidateMapFunc *invalidate; + void *invalidate_opaque; + + QLIST_ENTRY(DMAMemoryMap) list; +}; + +void dma_context_init(DMAContext *dma, DMATranslateFunc fn) +{ +#ifdef DEBUG_IOMMU + fprintf(stderr, "dma_context_init(%p, %p)\n", dma, fn); +#endif + dma->translate = fn; + QLIST_INIT(&dma->memory_maps); +} + +void dma_invalidate_memory_range(DMAContext *dma, + dma_addr_t addr, dma_addr_t len) +{ + DMAMemoryMap *map; + + QLIST_FOREACH(map, &dma->memory_maps, list) { + if (ranges_overlap(addr, len, map->addr, map->len)) { + map->invalidate(map->invalidate_opaque); + QLIST_REMOVE(map, list); + free(map); + } + } +} + +void *__dma_memory_map(DMAContext *dma, DMAInvalidateMapFunc *cb, + void *cb_opaque, dma_addr_t addr, dma_addr_t *len, + DMADirection dir) +{ + int err; + target_phys_addr_t paddr, plen; + void *buf; + + plen = *len; + err = dma->translate(dma, addr, &paddr, &plen, dir); + if (err) { + return NULL; + } + + /* + * If this is true, the virtual region is contiguous, + * but the translated physical region isn't. We just + * clamp *len, much like cpu_physical_memory_map() does. + */ + if (plen < *len) { + *len = plen; + } + + buf = cpu_physical_memory_map(paddr, &plen, + dir == DMA_DIRECTION_FROM_DEVICE); + *len = plen; + + /* We treat maps as remote TLBs to cope with stuff like AIO. */ + if (cb) { + DMAMemoryMap *map; + + map = g_malloc(sizeof(DMAMemoryMap)); + map->addr = addr; + map->len = *len; + map->buf = buf; + map->invalidate = cb; + map->invalidate_opaque = cb_opaque; + + QLIST_INSERT_HEAD(&dma->memory_maps, map, list); + } + + return buf; +} + +void __dma_memory_unmap(DMAContext *dma, void *buffer, dma_addr_t len, + DMADirection dir, dma_addr_t access_len) +{ + DMAMemoryMap *map; + + cpu_physical_memory_unmap(buffer, len, + dir == DMA_DIRECTION_FROM_DEVICE, + access_len); + + QLIST_FOREACH(map, &dma->memory_maps, list) { + if ((map->buf == buffer) && (map->len == len)) { + QLIST_REMOVE(map, list); + free(map); + } + } +} +#endif /* CONFIG_IOMMU */ diff --git a/dma.h b/dma.h index a66e3d7..ec06163 100644 --- a/dma.h +++ b/dma.h @@ -15,6 +15,7 @@ #include "hw/hw.h" #include "block.h" +typedef struct DMAContext DMAContext; typedef struct ScatterGatherEntry ScatterGatherEntry; typedef enum { @@ -31,30 +32,82 @@ struct QEMUSGList { }; #if defined(TARGET_PHYS_ADDR_BITS) + +#ifdef CONFIG_IOMMU +/* + * When an IOMMU is present, bus addresses become distinct from + * CPU/memory physical addresses and may be a different size. Because + * the IOVA size depends more on the bus than on the platform, we more + * or less have to treat these as 64-bit always to cover all (or at + * least most) cases. + */ +typedef uint64_t dma_addr_t; + +#define DMA_ADDR_BITS 64 +#define DMA_ADDR_FMT "%" PRIx64 +#else typedef target_phys_addr_t dma_addr_t; #define DMA_ADDR_BITS TARGET_PHYS_ADDR_BITS #define DMA_ADDR_FMT TARGET_FMT_plx +#endif + +typedef int DMATranslateFunc(DMAContext *dma, + dma_addr_t addr, + target_phys_addr_t *paddr, + target_phys_addr_t *len, + DMADirection dir); + +typedef struct DMAContext { +#ifdef CONFIG_IOMMU + DMATranslateFunc *translate; + QLIST_HEAD(memory_maps, DMAMemoryMap) memory_maps; +#endif +} DMAContext; + +#ifdef CONFIG_IOMMU +static inline bool dma_has_iommu(DMAContext *dma) +{ + return !!dma; +} +#else +static inline bool dma_has_iommu(DMAContext *dma) +{ + return false; +} +#endif typedef void DMAInvalidateMapFunc(void *); /* Checks that the given range of addresses is valid for DMA. This is * useful for certain cases, but usually you should just use * dma_memory_{read,write}() and check for errors */ -static inline bool dma_memory_valid(DMAContext *dma, dma_addr_t addr, - dma_addr_t len, DMADirection dir) +bool __dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len, + DMADirection dir); +static inline bool dma_memory_valid(DMAContext *dma, + dma_addr_t addr, dma_addr_t len, + DMADirection dir) { - /* Stub version, with no iommu we assume all bus addresses are valid */ - return true; + if (!dma_has_iommu(dma)) { + return true; + } else { + return __dma_memory_valid(dma, addr, len, dir); + } } +int __dma_memory_rw(DMAContext *dma, dma_addr_t addr, + void *buf, dma_addr_t len, DMADirection dir); static inline int dma_memory_rw(DMAContext *dma, dma_addr_t addr, void *buf, dma_addr_t len, DMADirection dir) { - /* Stub version when we have no iommu support */ - cpu_physical_memory_rw(addr, buf, (target_phys_addr_t)len, - dir == DMA_DIRECTION_FROM_DEVICE); - return 0; + if (!dma_has_iommu(dma)) { + /* Fast-path for no IOMMU */ + cpu_physical_memory_rw(addr, buf, len, + dir == DMA_DIRECTION_FROM_DEVICE); + return 0; + } else { + return __dma_memory_rw(dma, addr, buf, len, dir); + } } static inline int dma_memory_read(DMAContext *dma, dma_addr_t addr, @@ -70,35 +123,55 @@ static inline int dma_memory_write(DMAContext *dma, dma_addr_t addr, DMA_DIRECTION_FROM_DEVICE); } +int __dma_memory_zero(DMAContext *dma, dma_addr_t addr, dma_addr_t len); static inline int dma_memory_zero(DMAContext *dma, dma_addr_t addr, dma_addr_t len) { - /* Stub version when we have no iommu support */ - cpu_physical_memory_zero(addr, len); - return 0; + if (!dma_has_iommu(dma)) { + /* Fast-path for no IOMMU */ + cpu_physical_memory_zero(addr, len); + return 0; + } else { + return __dma_memory_zero(dma, addr, len); + } } +void *__dma_memory_map(DMAContext *dma, + DMAInvalidateMapFunc *cb, void *opaque, + dma_addr_t addr, dma_addr_t *len, + DMADirection dir); static inline void *dma_memory_map(DMAContext *dma, - DMAInvalidateMapFunc *cb, void *opaque, + DMAInvalidateMapFunc *cb, void *cb_opaque, dma_addr_t addr, dma_addr_t *len, DMADirection dir) { - target_phys_addr_t xlen = *len; - void *p; - - p = cpu_physical_memory_map(addr, &xlen, - dir == DMA_DIRECTION_FROM_DEVICE); - *len = xlen; - return p; + if (!dma_has_iommu(dma)) { + target_phys_addr_t xlen = *len; + void *p; + + p = cpu_physical_memory_map(addr, &xlen, + dir == DMA_DIRECTION_FROM_DEVICE); + *len = xlen; + return p; + } else { + return __dma_memory_map(dma, cb, cb_opaque, addr, len, dir); + } } +void __dma_memory_unmap(DMAContext *dma, + void *buffer, dma_addr_t len, + DMADirection dir, dma_addr_t access_len); static inline void dma_memory_unmap(DMAContext *dma, void *buffer, dma_addr_t len, DMADirection dir, dma_addr_t access_len) { - return cpu_physical_memory_unmap(buffer, (target_phys_addr_t)len, - dir == DMA_DIRECTION_FROM_DEVICE, - access_len); + if (!dma_has_iommu(dma)) { + return cpu_physical_memory_unmap(buffer, (target_phys_addr_t)len, + dir == DMA_DIRECTION_FROM_DEVICE, + access_len); + } else { + __dma_memory_unmap(dma, buffer, len, dir, access_len); + } } #define DEFINE_LDST_DMA(_lname, _sname, _bits, _end) \ @@ -139,6 +212,14 @@ DEFINE_LDST_DMA(q, q, 64, be); #undef DEFINE_LDST_DMA +#ifdef CONFIG_IOMMU + +void dma_context_init(DMAContext *dma, DMATranslateFunc fn); +void dma_invalidate_memory_range(DMAContext *dma, + dma_addr_t addr, dma_addr_t len); + +#endif /* CONFIG_IOMMU */ + struct ScatterGatherEntry { dma_addr_t base; dma_addr_t len; -- 1.7.9.1