Hi, I couldn't review the whole patch yet, but I have some comments below:
On Fri, Sep 14, 2018 at 01:26:59PM -0500, Brijesh Singh wrote: > Register the interrupt remapping callback and read/write ops for the > amd-iommu-ir memory region. > > amd-iommu-ir is set to higher priority to ensure that this region won't > be masked out by other memory regions. > > While at it, add a overlapping amd-iommu region with higher priority > and update address space name to include the devfn. > > Cc: "Michael S. Tsirkin" <m...@redhat.com> > Cc: Paolo Bonzini <pbonz...@redhat.com> > Cc: Richard Henderson <r...@twiddle.net> > Cc: Eduardo Habkost <ehabk...@redhat.com> > Cc: Marcel Apfelbaum <marcel.apfelb...@gmail.com> > Cc: Tom Lendacky <thomas.lenda...@amd.com> > Cc: Suravee Suthikulpanit <suravee.suthikulpa...@amd.com> > Signed-off-by: Brijesh Singh <brijesh.si...@amd.com> > --- > hw/i386/amd_iommu.c | 140 > ++++++++++++++++++++++++++++++++++++++++++++++++--- > hw/i386/amd_iommu.h | 17 ++++++- > hw/i386/trace-events | 5 ++ > 3 files changed, 154 insertions(+), 8 deletions(-) > > diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c > index 225825e..b15962b 100644 > --- a/hw/i386/amd_iommu.c > +++ b/hw/i386/amd_iommu.c > @@ -26,6 +26,7 @@ > #include "amd_iommu.h" > #include "qapi/error.h" > #include "qemu/error-report.h" > +#include "hw/i386/apic_internal.h" > #include "trace.h" > > /* used AMD-Vi MMIO registers */ > @@ -56,6 +57,7 @@ struct AMDVIAddressSpace { > uint8_t devfn; /* device function */ > AMDVIState *iommu_state; /* AMDVI - one per machine */ > IOMMUMemoryRegion iommu; /* Device's address translation region */ > + MemoryRegion root; /* AMDVI Root memory map region */ > MemoryRegion iommu_ir; /* Device's interrupt remapping region */ > AddressSpace as; /* device's corresponding address space */ > }; > @@ -1027,10 +1029,104 @@ static IOMMUTLBEntry > amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr, > return ret; > } > > +/* Interrupt remapping for MSI/MSI-X entry */ > +static int amdvi_int_remap_msi(AMDVIState *iommu, > + MSIMessage *origin, > + MSIMessage *translated, > + uint16_t sid) > +{ > + assert(origin && translated); > + > + trace_amdvi_ir_remap_msi_req(origin->address, origin->data, sid); > + > + if (!iommu || !iommu->intr_enabled) { > + memcpy(translated, origin, sizeof(*origin)); > + goto out; > + } > + > + if (origin->address & AMDVI_MSI_ADDR_HI_MASK) { > + trace_amdvi_err("MSI address high 32 bits non-zero when " > + "Interrupt Remapping enabled."); > + return -AMDVI_IR_ERR; > + } > + > + if ((origin->address & AMDVI_MSI_ADDR_LO_MASK) != APIC_DEFAULT_ADDRESS) { > + trace_amdvi_err("MSI is not from IOAPIC."); > + return -AMDVI_IR_ERR; > + } > + > +out: > + trace_amdvi_ir_remap_msi(origin->address, origin->data, > + translated->address, translated->data); > + return 0; > +} > + > +static int amdvi_int_remap(X86IOMMUState *iommu, > + MSIMessage *origin, > + MSIMessage *translated, > + uint16_t sid) > +{ > + return amdvi_int_remap_msi(AMD_IOMMU_DEVICE(iommu), origin, > + translated, sid); > +} > + > +static MemTxResult amdvi_mem_ir_write(void *opaque, hwaddr addr, > + uint64_t value, unsigned size, > + MemTxAttrs attrs) > +{ > + int ret; > + MSIMessage from = { 0, 0 }, to = { 0, 0 }; > + uint16_t sid = AMDVI_IOAPIC_SB_DEVID; > + > + from.address = (uint64_t) addr + AMDVI_INT_ADDR_FIRST; > + from.data = (uint32_t) value; > + > + trace_amdvi_mem_ir_write_req(addr, value, size); > + > + if (!attrs.unspecified) { > + /* We have explicit Source ID */ > + sid = attrs.requester_id; > + } > + > + ret = amdvi_int_remap_msi(opaque, &from, &to, sid); > + if (ret < 0) { > + /* TODO: report error */ How do you plan to address this TODO item? > + /* Drop the interrupt */ What does this comment mean? Is this also a TODO item? > + return MEMTX_ERROR; > + } > + > + apic_get_class()->send_msi(&to); > + > + trace_amdvi_mem_ir_write(to.address, to.data); > + return MEMTX_OK; > +} > + > +static MemTxResult amdvi_mem_ir_read(void *opaque, hwaddr addr, > + uint64_t *data, unsigned size, > + MemTxAttrs attrs) > +{ > + return MEMTX_OK; > +} > + > +static const MemoryRegionOps amdvi_ir_ops = { > + .read_with_attrs = amdvi_mem_ir_read, > + .write_with_attrs = amdvi_mem_ir_write, > + .endianness = DEVICE_LITTLE_ENDIAN, > + .impl = { > + .min_access_size = 4, > + .max_access_size = 4, > + }, > + .valid = { > + .min_access_size = 4, > + .max_access_size = 4, > + } > +}; > + > static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int > devfn) > { > + char name[128]; > AMDVIState *s = opaque; > - AMDVIAddressSpace **iommu_as; > + AMDVIAddressSpace **iommu_as, *amdvi_dev_as; > int bus_num = pci_bus_num(bus); > > iommu_as = s->address_spaces[bus_num]; > @@ -1043,19 +1139,46 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus > *bus, void *opaque, int devfn) > > /* set up AMD-Vi region */ > if (!iommu_as[devfn]) { > + snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn); > + > iommu_as[devfn] = g_malloc0(sizeof(AMDVIAddressSpace)); > iommu_as[devfn]->bus_num = (uint8_t)bus_num; > iommu_as[devfn]->devfn = (uint8_t)devfn; > iommu_as[devfn]->iommu_state = s; > > - memory_region_init_iommu(&iommu_as[devfn]->iommu, > - sizeof(iommu_as[devfn]->iommu), > + amdvi_dev_as = iommu_as[devfn]; > + > + /* > + * Memory region relationships looks like (Address range shows > + * only lower 32 bits to make it short in length...): > + * > + * |-----------------+-------------------+----------| > + * | Name | Address range | Priority | > + * |-----------------+-------------------+----------+ > + * | amdvi_root | 00000000-ffffffff | 0 | > + * | amdvi_iommu | 00000000-ffffffff | 1 | > + * | amdvi_iommu_ir | fee00000-feefffff | 64 | > + * |-----------------+-------------------+----------| > + */ > + memory_region_init_iommu(&amdvi_dev_as->iommu, > + sizeof(amdvi_dev_as->iommu), The change from iommu_as[devfn] to amdvi_dev_as makes this patch harder to review. Not a big deal, but if you introduce it in a separate patch you'll help reviewers. > TYPE_AMD_IOMMU_MEMORY_REGION, > OBJECT(s), > "amd-iommu", UINT64_MAX); > - address_space_init(&iommu_as[devfn]->as, > - MEMORY_REGION(&iommu_as[devfn]->iommu), > - "amd-iommu"); > + memory_region_init(&amdvi_dev_as->root, OBJECT(s), > + "amdvi_root", UINT64_MAX); > + address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name); The old code simply used "amd-iommu" as the address space name, why did you decide to rename it? The commit message says you renamed it, but doesn't say why. The new name follows a different style: the old one used "-" and the new one uses "_". Why? > + memory_region_init_io(&amdvi_dev_as->iommu_ir, OBJECT(s), > + &amdvi_ir_ops, s, "amd-iommu-ir", > + AMDVI_INT_ADDR_SIZE); > + memory_region_add_subregion_overlap(&amdvi_dev_as->root, > + AMDVI_INT_ADDR_FIRST, > + &amdvi_dev_as->iommu_ir, > + 64); > + memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0, > + > MEMORY_REGION(&amdvi_dev_as->iommu), > + 1); > + > } > return &iommu_as[devfn]->as; > } > @@ -1173,6 +1296,10 @@ static void amdvi_realize(DeviceState *dev, Error > **err) > return; > } > > + /* Pseudo address space under root PCI bus. */ > + pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); > + s->intr_enabled = x86_iommu->intr_supported; > + > /* set up MMIO */ > memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, > "amdvi-mmio", > AMDVI_MMIO_SIZE); > @@ -1206,6 +1333,7 @@ static void amdvi_class_init(ObjectClass *klass, void* > data) > dc->vmsd = &vmstate_amdvi; > dc->hotpluggable = false; > dc_class->realize = amdvi_realize; > + dc_class->int_remap = amdvi_int_remap; > /* Supported by the pc-q35-* machine types */ > dc->user_creatable = true; > } > diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h > index 8740305..71ff3c1 100644 > --- a/hw/i386/amd_iommu.h > +++ b/hw/i386/amd_iommu.h > @@ -206,8 +206,18 @@ > > #define AMDVI_COMMAND_SIZE 16 > > -#define AMDVI_INT_ADDR_FIRST 0xfee00000 > -#define AMDVI_INT_ADDR_LAST 0xfeefffff > +#define AMDVI_INT_ADDR_FIRST 0xfee00000 > +#define AMDVI_INT_ADDR_LAST 0xfeefffff > +#define AMDVI_INT_ADDR_SIZE (AMDVI_INT_ADDR_LAST - AMDVI_INT_ADDR_FIRST > + 1) > +#define AMDVI_MSI_ADDR_HI_MASK (0xffffffff00000000ULL) > +#define AMDVI_MSI_ADDR_LO_MASK (0x00000000ffffffffULL) > + > +/* SB IOAPIC is always on this device in AMD systems */ > +#define AMDVI_IOAPIC_SB_DEVID PCI_BUILD_BDF(0, PCI_DEVFN(0x14, 0)) > + > +/* Interrupt remapping errors */ > +#define AMDVI_IR_ERR 0x1 > + > > #define TYPE_AMD_IOMMU_DEVICE "amd-iommu" > #define AMD_IOMMU_DEVICE(obj)\ > @@ -278,6 +288,9 @@ typedef struct AMDVIState { > > /* IOTLB */ > GHashTable *iotlb; > + > + /* Interrupt remapping */ > + bool intr_enabled; Why do you need this field if the same info is already available at AMDVIState::iommu::intr_supported? > } AMDVIState; > > #endif > diff --git a/hw/i386/trace-events b/hw/i386/trace-events > index 9e6fc4d..41d533c 100644 > --- a/hw/i386/trace-events > +++ b/hw/i386/trace-events > @@ -101,6 +101,11 @@ amdvi_mode_invalid(uint8_t level, uint64_t addr)"error: > translation level 0x%"PR > amdvi_page_fault(uint64_t addr) "error: page fault accessing guest physical > address 0x%"PRIx64 > amdvi_iotlb_hit(uint8_t bus, uint8_t slot, uint8_t func, uint64_t addr, > uint64_t txaddr) "hit iotlb devid %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64 > amdvi_translation_result(uint8_t bus, uint8_t slot, uint8_t func, uint64_t > addr, uint64_t txaddr) "devid: %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64 > +amdvi_mem_ir_write_req(uint64_t addr, uint64_t val, uint32_t size) "addr > 0x%"PRIx64" data 0x%"PRIx64" size 0x%"PRIx32 > +amdvi_mem_ir_write(uint64_t addr, uint64_t val) "addr 0x%"PRIx64" data > 0x%"PRIx64 > +amdvi_ir_remap_msi_req(uint64_t addr, uint64_t data, uint8_t devid) "addr > 0x%"PRIx64" data 0x%"PRIx64" devid 0x%"PRIx8 > +amdvi_ir_remap_msi(uint64_t addr, uint64_t data, uint64_t addr2, uint64_t > data2) "(addr 0x%"PRIx64", data 0x%"PRIx64") -> (addr 0x%"PRIx64", data > 0x%"PRIx64")" > +amdvi_err(const char *str) "%s" > > # hw/i386/vmport.c > vmport_register(unsigned char command, void *func, void *opaque) "command: > 0x%02x func: %p opaque: %p" > -- > 2.7.4 > > -- Eduardo