Il 09/03/2012 06:01, David Gibson ha scritto: > This patch adds the basic infrastructure necessary to emulate an IOMMU > visible to the guest. The DMAContext structure is extended with > information and a callback describing the translation, and the various > DMA functions used by devices will now perform IOMMU translation using > this callback. > > Cc: Michael S. Tsirkin <m...@redhat.com> > Cc: Richard Henderson <r...@twiddle.net> > > Signed-off-by: Eduard - Gabriel Munteanu <eduard.munte...@linux360.ro> > Signed-off-by: David Gibson <da...@gibson.dropbear.id.au> > --- > configure | 12 ++++ > dma-helpers.c | 191 > +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > dma.h | 125 +++++++++++++++++++++++++++++++------- > 3 files changed, 306 insertions(+), 22 deletions(-) > > diff --git a/configure b/configure > index a5eb832..e6fba2f 100755 > --- a/configure > +++ b/configure > @@ -138,6 +138,7 @@ linux_aio="" > cap_ng="" > attr="" > libattr="" > +iommu="yes" > xfs="" > > vhost_net="no" > @@ -784,6 +785,10 @@ for opt do > ;; > --enable-vhost-net) vhost_net="yes" > ;; > + --enable-iommu) iommu="yes" > + ;; > + --disable-iommu) iommu="no" > + ;;
This need not be a configure option. Just enable it, or it will bitrot. > --disable-opengl) opengl="no" > ;; > --enable-opengl) opengl="yes" > @@ -1085,6 +1090,8 @@ echo " --enable-docs enable documentation > build" > echo " --disable-docs disable documentation build" > echo " --disable-vhost-net disable vhost-net acceleration support" > echo " --enable-vhost-net enable vhost-net acceleration support" > +echo " --disable-iommu disable IOMMU emulation support" > +echo " --enable-iommu enable IOMMU emulation support" > echo " --enable-trace-backend=B Set trace backend" > echo " Available backends:" > $("$source_path"/scripts/tracetool --list-backends) > echo " --with-trace-file=NAME Full PATH,NAME of file to store traces" > @@ -2924,6 +2931,7 @@ echo "posix_madvise $posix_madvise" > echo "uuid support $uuid" > echo "libcap-ng support $cap_ng" > echo "vhost-net support $vhost_net" > +echo "IOMMU support $iommu" > echo "Trace backend $trace_backend" > echo "Trace output file $trace_file-<pid>" > echo "spice support $spice" > @@ -3801,6 +3809,10 @@ if test "$target_softmmu" = "yes" -a \( \ > echo "CONFIG_NEED_MMU=y" >> $config_target_mak > fi > > +if test "$iommu" = "yes" ; then > + echo "CONFIG_IOMMU=y" >> $config_host_mak > +fi > + > if test "$gprof" = "yes" ; then > echo "TARGET_GPROF=yes" >> $config_target_mak > if test "$target_linux_user" = "yes" ; then > diff --git a/dma-helpers.c b/dma-helpers.c > index 9dcfb2c..248575d 100644 > --- a/dma-helpers.c > +++ b/dma-helpers.c > @@ -10,6 +10,9 @@ > #include "dma.h" > #include "block_int.h" > #include "trace.h" > +#include "range.h" > + > +/* #define DEBUG_IOMMU */ > > void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma) > { > @@ -255,3 +258,191 @@ void dma_acct_start(BlockDriverState *bs, > BlockAcctCookie *cookie, > { > bdrv_acct_start(bs, cookie, sg->size, type); > } > + > +#ifdef CONFIG_IOMMU > +bool __dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len, > + DMADirection dir) No __ identifiers, please. You could call these dma_context_memory_{valid,read,write}. > +{ > + target_phys_addr_t paddr, plen; > + > +#ifdef DEBUG_IOMMU > + fprintf(stderr, "dma_memory_check context=%p addr=0x" DMA_ADDR_FMT > + " len=0x" DMA_ADDR_FMT " dir=%d\n", dma, addr, len, dir); > +#endif > + > + while (len) { > + if (dma->translate(dma, addr, &paddr, &plen, dir) != 0) { > + return false; > + } > + > + /* The translation might be valid for larger regions. */ > + if (plen > len) { > + plen = len; > + } > + > + len -= plen; > + addr += plen; > + } > + > + return true; > +} > + > +int __dma_memory_rw(DMAContext *dma, dma_addr_t addr, > + void *buf, dma_addr_t len, DMADirection dir) > +{ > + target_phys_addr_t paddr, plen; > + int err; > + > +#ifdef DEBUG_IOMMU > + fprintf(stderr, "dma_memory_rw context=%p addr=0x" DMA_ADDR_FMT " len=0x" > + DMA_ADDR_FMT " dir=%d\n", dma, addr, len, dir); > +#endif > + > + while (len) { > + err = dma->translate(dma, addr, &paddr, &plen, dir); > + if (err) { > + return -1; > + } > + > + /* The translation might be valid for larger regions. */ > + if (plen > len) { > + plen = len; > + } > + > + cpu_physical_memory_rw(paddr, buf, plen, > + dir == DMA_DIRECTION_FROM_DEVICE); > + > + len -= plen; > + addr += plen; > + buf += plen; > + } > + > + return 0; > +} > + > +int __dma_memory_zero(DMAContext *dma, dma_addr_t addr, dma_addr_t len) > +{ > + target_phys_addr_t paddr, plen; > + int err; > + > +#ifdef DEBUG_IOMMU > + fprintf(stderr, "dma_memory_zero context=%p addr=0x" DMA_ADDR_FMT > + " len=0x" DMA_ADDR_FMT "\n", dma, addr, len); > +#endif > + > + while (len) { > + err = dma->translate(dma, addr, &paddr, &plen, > + DMA_DIRECTION_FROM_DEVICE); > + if (err) { > + return err; > + } > + > + /* The translation might be valid for larger regions. */ > + if (plen > len) { > + plen = len; > + } > + > + cpu_physical_memory_zero(paddr, plen); > + > + len -= plen; > + addr += plen; > + } > + > + return 0; > +} > + > +typedef struct DMAMemoryMap DMAMemoryMap; > +struct DMAMemoryMap { > + dma_addr_t addr; > + size_t len; > + void *buf; > + DMAInvalidateMapFunc *invalidate; > + void *invalidate_opaque; > + > + QLIST_ENTRY(DMAMemoryMap) list; > +}; > + > +void dma_context_init(DMAContext *dma, DMATranslateFunc fn) > +{ > +#ifdef DEBUG_IOMMU > + fprintf(stderr, "dma_context_init(%p, %p)\n", dma, fn); > +#endif > + dma->translate = fn; > + QLIST_INIT(&dma->memory_maps); > +} > + > +void dma_invalidate_memory_range(DMAContext *dma, > + dma_addr_t addr, dma_addr_t len) > +{ > + DMAMemoryMap *map; > + > + QLIST_FOREACH(map, &dma->memory_maps, list) { > + if (ranges_overlap(addr, len, map->addr, map->len)) { > + map->invalidate(map->invalidate_opaque); > + QLIST_REMOVE(map, list); > + free(map); > + } > + } > +} > + > +void *__dma_memory_map(DMAContext *dma, DMAInvalidateMapFunc *cb, > + void *cb_opaque, dma_addr_t addr, dma_addr_t *len, > + DMADirection dir) > +{ > + int err; > + target_phys_addr_t paddr, plen; > + void *buf; > + > + plen = *len; > + err = dma->translate(dma, addr, &paddr, &plen, dir); > + if (err) { > + return NULL; > + } > + > + /* > + * If this is true, the virtual region is contiguous, > + * but the translated physical region isn't. We just > + * clamp *len, much like cpu_physical_memory_map() does. > + */ > + if (plen < *len) { > + *len = plen; > + } > + > + buf = cpu_physical_memory_map(paddr, &plen, > + dir == DMA_DIRECTION_FROM_DEVICE); > + *len = plen; > + > + /* We treat maps as remote TLBs to cope with stuff like AIO. */ > + if (cb) { > + DMAMemoryMap *map; > + > + map = g_malloc(sizeof(DMAMemoryMap)); > + map->addr = addr; > + map->len = *len; > + map->buf = buf; > + map->invalidate = cb; > + map->invalidate_opaque = cb_opaque; > + > + QLIST_INSERT_HEAD(&dma->memory_maps, map, list); > + } > + > + return buf; > +} > + > +void __dma_memory_unmap(DMAContext *dma, void *buffer, dma_addr_t len, > + DMADirection dir, dma_addr_t access_len) > +{ > + DMAMemoryMap *map; > + > + cpu_physical_memory_unmap(buffer, len, > + dir == DMA_DIRECTION_FROM_DEVICE, > + access_len); > + > + QLIST_FOREACH(map, &dma->memory_maps, list) { > + if ((map->buf == buffer) && (map->len == len)) { > + QLIST_REMOVE(map, list); > + free(map); > + } > + } > +} > +#endif /* CONFIG_IOMMU */ > diff --git a/dma.h b/dma.h > index a66e3d7..ec06163 100644 > --- a/dma.h > +++ b/dma.h > @@ -15,6 +15,7 @@ > #include "hw/hw.h" > #include "block.h" > > +typedef struct DMAContext DMAContext; > typedef struct ScatterGatherEntry ScatterGatherEntry; > > typedef enum { > @@ -31,30 +32,82 @@ struct QEMUSGList { > }; > > #if defined(TARGET_PHYS_ADDR_BITS) > + > +#ifdef CONFIG_IOMMU ... which i making dma_addr_t always 64-bit. This way you can even remove the new qdev-dma.c file. Just make DEFINE_PROP_DMAADDR an alias for DEFINE_PROP_HEX64. > +/* > + * When an IOMMU is present, bus addresses become distinct from > + * CPU/memory physical addresses and may be a different size. Because > + * the IOVA size depends more on the bus than on the platform, we more > + * or less have to treat these as 64-bit always to cover all (or at > + * least most) cases. > + */ > +typedef uint64_t dma_addr_t; > + > +#define DMA_ADDR_BITS 64 > +#define DMA_ADDR_FMT "%" PRIx64 > +#else > typedef target_phys_addr_t dma_addr_t; > > #define DMA_ADDR_BITS TARGET_PHYS_ADDR_BITS > #define DMA_ADDR_FMT TARGET_FMT_plx > +#endif > + > +typedef int DMATranslateFunc(DMAContext *dma, > + dma_addr_t addr, > + target_phys_addr_t *paddr, > + target_phys_addr_t *len, > + DMADirection dir); > + > +typedef struct DMAContext { > +#ifdef CONFIG_IOMMU > + DMATranslateFunc *translate; > + QLIST_HEAD(memory_maps, DMAMemoryMap) memory_maps; > +#endif > +} DMAContext; > + > +#ifdef CONFIG_IOMMU > +static inline bool dma_has_iommu(DMAContext *dma) > +{ > + return !!dma; > +} > +#else > +static inline bool dma_has_iommu(DMAContext *dma) > +{ > + return false; > +} > +#endif > > typedef void DMAInvalidateMapFunc(void *); > > /* Checks that the given range of addresses is valid for DMA. This is > * useful for certain cases, but usually you should just use > * dma_memory_{read,write}() and check for errors */ > -static inline bool dma_memory_valid(DMAContext *dma, dma_addr_t addr, > - dma_addr_t len, DMADirection dir) > +bool __dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len, > + DMADirection dir); > +static inline bool dma_memory_valid(DMAContext *dma, > + dma_addr_t addr, dma_addr_t len, > + DMADirection dir) > { > - /* Stub version, with no iommu we assume all bus addresses are valid */ > - return true; > + if (!dma_has_iommu(dma)) { > + return true; > + } else { > + return __dma_memory_valid(dma, addr, len, dir); > + } > } > > +int __dma_memory_rw(DMAContext *dma, dma_addr_t addr, > + void *buf, dma_addr_t len, DMADirection dir); > static inline int dma_memory_rw(DMAContext *dma, dma_addr_t addr, > void *buf, dma_addr_t len, DMADirection dir) > { > - /* Stub version when we have no iommu support */ > - cpu_physical_memory_rw(addr, buf, (target_phys_addr_t)len, > - dir == DMA_DIRECTION_FROM_DEVICE); > - return 0; > + if (!dma_has_iommu(dma)) { > + /* Fast-path for no IOMMU */ > + cpu_physical_memory_rw(addr, buf, len, > + dir == DMA_DIRECTION_FROM_DEVICE); > + return 0; > + } else { > + return __dma_memory_rw(dma, addr, buf, len, dir); > + } > } > > static inline int dma_memory_read(DMAContext *dma, dma_addr_t addr, > @@ -70,35 +123,55 @@ static inline int dma_memory_write(DMAContext *dma, > dma_addr_t addr, > DMA_DIRECTION_FROM_DEVICE); > } > > +int __dma_memory_zero(DMAContext *dma, dma_addr_t addr, dma_addr_t len); > static inline int dma_memory_zero(DMAContext *dma, dma_addr_t addr, > dma_addr_t len) > { > - /* Stub version when we have no iommu support */ > - cpu_physical_memory_zero(addr, len); > - return 0; > + if (!dma_has_iommu(dma)) { > + /* Fast-path for no IOMMU */ > + cpu_physical_memory_zero(addr, len); > + return 0; > + } else { > + return __dma_memory_zero(dma, addr, len); > + } > } > > +void *__dma_memory_map(DMAContext *dma, > + DMAInvalidateMapFunc *cb, void *opaque, > + dma_addr_t addr, dma_addr_t *len, > + DMADirection dir); > static inline void *dma_memory_map(DMAContext *dma, > - DMAInvalidateMapFunc *cb, void *opaque, > + DMAInvalidateMapFunc *cb, void *cb_opaque, > dma_addr_t addr, dma_addr_t *len, > DMADirection dir) > { > - target_phys_addr_t xlen = *len; > - void *p; > - > - p = cpu_physical_memory_map(addr, &xlen, > - dir == DMA_DIRECTION_FROM_DEVICE); > - *len = xlen; > - return p; > + if (!dma_has_iommu(dma)) { > + target_phys_addr_t xlen = *len; > + void *p; > + > + p = cpu_physical_memory_map(addr, &xlen, > + dir == DMA_DIRECTION_FROM_DEVICE); > + *len = xlen; > + return p; > + } else { > + return __dma_memory_map(dma, cb, cb_opaque, addr, len, dir); > + } > } > > +void __dma_memory_unmap(DMAContext *dma, > + void *buffer, dma_addr_t len, > + DMADirection dir, dma_addr_t access_len); > static inline void dma_memory_unmap(DMAContext *dma, > void *buffer, dma_addr_t len, > DMADirection dir, dma_addr_t access_len) > { > - return cpu_physical_memory_unmap(buffer, (target_phys_addr_t)len, > - dir == DMA_DIRECTION_FROM_DEVICE, > - access_len); > + if (!dma_has_iommu(dma)) { > + return cpu_physical_memory_unmap(buffer, (target_phys_addr_t)len, > + dir == DMA_DIRECTION_FROM_DEVICE, > + access_len); > + } else { > + __dma_memory_unmap(dma, buffer, len, dir, access_len); > + } > } > > #define DEFINE_LDST_DMA(_lname, _sname, _bits, _end) \ > @@ -139,6 +212,14 @@ DEFINE_LDST_DMA(q, q, 64, be); > > #undef DEFINE_LDST_DMA > > +#ifdef CONFIG_IOMMU > + > +void dma_context_init(DMAContext *dma, DMATranslateFunc fn); > +void dma_invalidate_memory_range(DMAContext *dma, > + dma_addr_t addr, dma_addr_t len); Should dma_invalidate_memory_range be changed to a no-op if dma == NULL? No idea, just asking. Paolo > + > +#endif /* CONFIG_IOMMU */ > + > struct ScatterGatherEntry { > dma_addr_t base; > dma_addr_t len;