On Mon, Apr 11, 2016 at 05:19:23PM +0800, Peter Xu wrote: > This patch enables interrupt remapping for PCI devices. > > To play the trick, one memory region "iommu_ir" is added as child region > of the original iommu memory region, covering range 0xfeeXXXXX (which is > the address range for APIC). All the writes to this range will be taken > as MSI, and translation is carried out when IR is enabled. > > The translation process tried to make full use of the helper function > from IOAPIC patch. Several more bits are added to VTDIrq as MSI specific > fields. > > Signed-off-by: Peter Xu <pet...@redhat.com>
So far so good, but I wonder how we are going to support irqfd with this approach. It would be nicer to have a single module that somehow handles both ioapic and non ioapic. > --- > hw/i386/intel_iommu.c | 172 > ++++++++++++++++++++++++++++++++++++++++- > hw/i386/intel_iommu_internal.h | 2 + > include/hw/i386/intel_iommu.h | 34 ++++++++ > include/hw/pci/msi.h | 4 + > 4 files changed, 211 insertions(+), 1 deletion(-) > > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c > index 1dcdc7b..322b5ac 100644 > --- a/hw/i386/intel_iommu.c > +++ b/hw/i386/intel_iommu.c > @@ -44,6 +44,10 @@ static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | > VTD_DBGBIT(CSR); > #define VTD_DPRINTF(what, fmt, ...) do {} while (0) > #endif > > +static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, > + MSIMessage *origin, > + MSIMessage *translated); > + > static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val, > uint64_t wmask, uint64_t w1cmask) > { don't forward-declare static functions, just arrange them sensibly. > @@ -1964,12 +1968,70 @@ static const MemoryRegionOps vtd_mem_ops = { > }, > }; > > +static uint64_t vtd_mem_ir_read(void *opaque, hwaddr addr, unsigned size) > +{ > + uint64_t data = 0; > + > + addr += VTD_INTERRUPT_ADDR_FIRST; > + > + VTD_DPRINTF(IR, "read mem_ir addr 0x%"PRIx64 " size %u", > + addr, size); > + > + if (dma_memory_read(&address_space_memory, addr, &data, size)) { > + VTD_DPRINTF(GENERAL, "error: fail to access 0x%"PRIx64, addr); > + return (uint32_t) -1; > + } > + > + return data; > +} > + > +static void vtd_mem_ir_write(void *opaque, hwaddr addr, > + uint64_t value, unsigned size) > +{ > + int ret = 0; > + MSIMessage from = {0}, to = {0}; > + > + from.address = (uint64_t) addr + VTD_INTERRUPT_ADDR_FIRST; > + from.data = (uint32_t) value; > + > + ret = vtd_interrupt_remap_msi(opaque, &from, &to); > + if (ret) { > + /* TODO: report error */ > + VTD_DPRINTF(GENERAL, "int remap fail for addr 0x%"PRIx64 > + " data 0x%"PRIx32, from.address, from.data); > + /* Drop this interrupt */ > + return; > + } > + > + VTD_DPRINTF(IR, "delivering MSI 0x%"PRIx64":0x%"PRIx32, > + to.address, to.data); > + > + if (dma_memory_write(&address_space_memory, to.address, > + &to.data, size)) { > + VTD_DPRINTF(GENERAL, "error: fail to write 0x%"PRIx64 > + " value 0x%"PRIx32, to.address, to.data); > + } > +} > + > +static const MemoryRegionOps vtd_mem_ir_ops = { > + .read = vtd_mem_ir_read, > + .write = vtd_mem_ir_write, > + .endianness = DEVICE_LITTLE_ENDIAN, > + .impl = { > + .min_access_size = 4, > + .max_access_size = 4, > + }, > + .valid = { > + .min_access_size = 4, > + .max_access_size = 4, > + }, > +}; > + > static Property vtd_properties[] = { > DEFINE_PROP_UINT32("version", IntelIOMMUState, version, 0), > DEFINE_PROP_END_OF_LIST(), > }; > > - > VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) > { > uintptr_t key = (uintptr_t)bus; > @@ -1995,6 +2057,11 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, > PCIBus *bus, int devfn) > vtd_dev_as->context_cache_entry.context_cache_gen = 0; > memory_region_init_iommu(&vtd_dev_as->iommu, OBJECT(s), > &s->iommu_ops, "intel_iommu", UINT64_MAX); > + memory_region_init_io(&vtd_dev_as->iommu_ir, OBJECT(s), > + &vtd_mem_ir_ops, s, "intel_iommu_ir", > + VTD_INTERRUPT_ADDR_SIZE); > + memory_region_add_subregion(&vtd_dev_as->iommu, > VTD_INTERRUPT_ADDR_FIRST, > + &vtd_dev_as->iommu_ir); > address_space_init(&vtd_dev_as->as, > &vtd_dev_as->iommu, "intel_iommu"); > } > @@ -2075,6 +2142,7 @@ static int vtd_remap_irq_get(IntelIOMMUState *iommu, > uint16_t index, VTDIrq *irq > irq->dest = (irte.dest_id & VTD_IR_APIC_DEST_MASK) >> \ > VTD_IR_APIC_DEST_SHIFT; > irq->dest_mode = irte.dest_mode; > + irq->redir_hint = irte.redir_hint; > > VTD_DPRINTF(IR, "remapping interrupt index %d: trig:%u,vec:%u," > "deliver:%u,dest:%u,dest_mode:%u", index, > @@ -2141,6 +2209,108 @@ int vtd_interrupt_remap_ioapic(IntelIOMMUState *iommu, > return 0; > } > > +/* Generate one MSI message from VTDIrq info */ > +static void vtd_generate_msi_message(VTDIrq *irq, MSIMessage *msg_out) > +{ > + VTD_MSIMessage msg; > + > + bzero(&msg, sizeof(msg)); > + Just msg = {} will do it. > + /* Generate address bits */ > + msg.dest_mode = irq->dest_mode; > + msg.redir_hint = irq->redir_hint; > + msg.dest = irq->dest; > + msg.__addr_head = 0xfee; > + /* Keep this from original MSI address bits */ > + msg.__not_used = irq->msi_addr_last_bits; > + > + /* Generate data bits */ > + msg.vector = irq->vector; > + msg.delivery_mode = irq->delivery_mode; > + /* FIXME: assert always for level mode interrupt */ what does this mean? > + msg.level = 1; > + msg.trigger_mode = irq->trigger_mode; > + > + msg_out->address = msg.msi_addr; > + msg_out->data = msg.msi_data; > +} > + > +/* Interrupt remapping for MSI/MSI-X entry */ > +static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, > + MSIMessage *origin, > + MSIMessage *translated) > +{ > + int ret = 0; > + VTD_IR_MSIAddress addr; > + uint16_t index = 0; > + VTDIrq irq = {0}; > + > + assert(iommu && origin && translated); > + > + if (!iommu->intr_enabled) { > + memcpy(translated, origin, sizeof(*origin)); > + return 0; > + } > + > + if (origin->data) { > + VTD_DPRINTF(GENERAL, "error: MSI data bits non-zero for " > + "interrupt remappable entry: 0x%"PRIx32, > + origin->data); > + return -VTD_FR_IR_REQ_RSVD; > + } > + > + if (origin->address & MSI_ADDR_HI_MASK) { > + VTD_DPRINTF(GENERAL, "error: MSI addr high 32 bits nonzero" > + " during interrupt remapping: 0x%"PRIx32, > + (uint32_t)((origin->address & MSI_ADDR_HI_MASK) >> \ > + MSI_ADDR_HI_SHIFT)); > + return -VTD_FR_IR_REQ_RSVD; > + } > + > + addr.data = origin->address & MSI_ADDR_LO_MASK; > + if (addr.__head != 0xfee) { > + VTD_DPRINTF(GENERAL, "error: MSI addr low 32 bits invalid: " > + "0x%"PRIx32, addr.data); > + return -VTD_FR_IR_REQ_RSVD; > + } > + > + if (addr.sub_valid != 1) { > + VTD_DPRINTF(GENERAL, "error: SHV not set for MSI addr: " > + "0x%"PRIx32, addr.data); > + return -VTD_FR_IR_REQ_RSVD; > + } > + > + /* TODO: Currently we still do not support compatible mode */ > + if (addr.int_mode != VTD_IR_INT_FORMAT_REMAP) { > + VTD_DPRINTF(GENERAL, "error: trying to remap MSI entry" > + " with compatible format: 0x%"PRIx32, > + addr.data); > + return -VTD_FR_IR_REQ_COMPAT; > + } > + > + index = addr.index_h << 15 | addr.index_l; > + > + ret = vtd_remap_irq_get(iommu, index, &irq); > + if (ret) { > + return ret; > + } > + > + /* > + * We'd better keep the last two bits, assuming that guest OS > + * might modify it. Keep it does not hurt after all. > + */ > + irq.msi_addr_last_bits = addr.__not_care; > + > + /* Translate VTDIrq to MSI message */ > + vtd_generate_msi_message(&irq, translated); > + > + VTD_DPRINTF(IR, "mapping MSI 0x%"PRIx64":0x%"PRIx32 " -> " > + "0x%"PRIx64":0x%"PRIx32, origin->address, origin->data, > + translated->address, translated->data); > + > + return 0; > +} > + > /* Do the initialization. It will also be called when reset, so pay > * attention when adding new initialization stuff. > */ > diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h > index 2a9987f..e1a08cb 100644 > --- a/hw/i386/intel_iommu_internal.h > +++ b/hw/i386/intel_iommu_internal.h > @@ -110,6 +110,8 @@ > /* Interrupt Address Range */ > #define VTD_INTERRUPT_ADDR_FIRST 0xfee00000ULL > #define VTD_INTERRUPT_ADDR_LAST 0xfeefffffULL > +#define VTD_INTERRUPT_ADDR_SIZE (VTD_INTERRUPT_ADDR_LAST - \ > + VTD_INTERRUPT_ADDR_FIRST + 1) > > /* The shift of source_id in the key of IOTLB hash table */ > #define VTD_IOTLB_SID_SHIFT 36 > diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h > index 6a79207..9297eba 100644 > --- a/include/hw/i386/intel_iommu.h > +++ b/include/hw/i386/intel_iommu.h > @@ -24,6 +24,7 @@ > #include "hw/qdev.h" > #include "sysemu/dma.h" > #include "hw/i386/ioapic.h" > +#include "hw/pci/msi.h" > > #define TYPE_INTEL_IOMMU_DEVICE "intel-iommu" > #define INTEL_IOMMU_DEVICE(obj) \ > @@ -57,6 +58,7 @@ typedef union VTD_IRTE VTD_IRTE; > typedef union VTD_IR_IOAPICEntry VTD_IR_IOAPICEntry; > typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress; > typedef struct VTDIrq VTDIrq; > +typedef struct VTD_MSIMessage VTD_MSIMessage; > > /* Context-Entry */ > struct VTDContextEntry { > @@ -77,6 +79,7 @@ struct VTDAddressSpace { > uint8_t devfn; > AddressSpace as; > MemoryRegion iommu; > + MemoryRegion iommu_ir; /* Interrupt region: 0xfeeXXXXX */ > IntelIOMMUState *iommu_state; > VTDContextCacheEntry context_cache_entry; > }; > @@ -154,11 +157,42 @@ union VTD_IR_MSIAddress { > > /* Generic IRQ entry information */ > struct VTDIrq { > + /* Used by both IOAPIC/MSI interrupt remapping */ > uint8_t trigger_mode; > uint8_t vector; > uint8_t delivery_mode; > uint32_t dest; > uint8_t dest_mode; > + > + /* only used by MSI interrupt remapping */ > + uint8_t redir_hint; > + uint8_t msi_addr_last_bits; > +}; > + > +struct VTD_MSIMessage { > + union { > + struct { > + uint16_t __not_used:2; > + uint16_t dest_mode:1; > + uint16_t redir_hint:1; > + uint16_t __reserved:8; > + uint16_t dest:8; > + uint16_t __addr_head:12; /* 0xfee */ > + uint32_t __addr_hi:32; > + } QEMU_PACKED; > + uint64_t msi_addr; > + }; > + union { > + struct { > + uint16_t vector:8; > + uint16_t delivery_mode:3; > + uint16_t __resved:3; > + uint16_t level:1; > + uint16_t trigger_mode:1; > + uint16_t __resved1:16; > + } QEMU_PACKED; > + uint32_t msi_data; > + }; > }; > > /* When IR is enabled, all MSI/MSI-X data bits should be zero */ > diff --git a/include/hw/pci/msi.h b/include/hw/pci/msi.h > index 8124908..8e94fca 100644 > --- a/include/hw/pci/msi.h > +++ b/include/hw/pci/msi.h > @@ -24,6 +24,10 @@ > #include "qemu-common.h" > #include "hw/pci/pci.h" > > +#define MSI_ADDR_HI_MASK (0xffffffff00000000ULL) > +#define MSI_ADDR_HI_SHIFT (32) > +#define MSI_ADDR_LO_MASK (0x00000000ffffffffULL) > + I don't think these are needed in msi.h, rename them with vtd prefix and put them where they are used. > struct MSIMessage { > uint64_t address; > uint32_t data; > -- > 2.4.3