On Mon, Sep 17, 2018 at 11:00:46AM -0500, Brijesh Singh wrote: > > > On 09/17/2018 08:49 AM, Eduardo Habkost wrote: > > Hi, > > > > I couldn't review the whole patch yet, but I have some comments > > below: > > > > On Fri, Sep 14, 2018 at 01:26:59PM -0500, Brijesh Singh wrote: > > > Register the interrupt remapping callback and read/write ops for the > > > amd-iommu-ir memory region. > > > > > > amd-iommu-ir is set to higher priority to ensure that this region won't > > > be masked out by other memory regions. > > > > > > While at it, add a overlapping amd-iommu region with higher priority > > > and update address space name to include the devfn. > > > > > > Cc: "Michael S. Tsirkin" <m...@redhat.com> > > > Cc: Paolo Bonzini <pbonz...@redhat.com> > > > Cc: Richard Henderson <r...@twiddle.net> > > > Cc: Eduardo Habkost <ehabk...@redhat.com> > > > Cc: Marcel Apfelbaum <marcel.apfelb...@gmail.com> > > > Cc: Tom Lendacky <thomas.lenda...@amd.com> > > > Cc: Suravee Suthikulpanit <suravee.suthikulpa...@amd.com> > > > Signed-off-by: Brijesh Singh <brijesh.si...@amd.com> > > > --- > > > hw/i386/amd_iommu.c | 140 > > > ++++++++++++++++++++++++++++++++++++++++++++++++--- > > > hw/i386/amd_iommu.h | 17 ++++++- > > > hw/i386/trace-events | 5 ++ > > > 3 files changed, 154 insertions(+), 8 deletions(-) > > > > > > diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c > > > index 225825e..b15962b 100644 > > > --- a/hw/i386/amd_iommu.c > > > +++ b/hw/i386/amd_iommu.c > > > @@ -26,6 +26,7 @@ > > > #include "amd_iommu.h" > > > #include "qapi/error.h" > > > #include "qemu/error-report.h" > > > +#include "hw/i386/apic_internal.h" > > > #include "trace.h" > > > /* used AMD-Vi MMIO registers */ > > > @@ -56,6 +57,7 @@ struct AMDVIAddressSpace { > > > uint8_t devfn; /* device function > > > */ > > > AMDVIState *iommu_state; /* AMDVI - one per machine > > > */ > > > IOMMUMemoryRegion iommu; /* Device's address translation region > > > */ > > > + MemoryRegion root; /* AMDVI Root memory map region */ > > > MemoryRegion iommu_ir; /* Device's interrupt remapping region > > > */ > > > AddressSpace as; /* device's corresponding address space > > > */ > > > }; > > > @@ -1027,10 +1029,104 @@ static IOMMUTLBEntry > > > amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr, > > > return ret; > > > } > > > +/* Interrupt remapping for MSI/MSI-X entry */ > > > +static int amdvi_int_remap_msi(AMDVIState *iommu, > > > + MSIMessage *origin, > > > + MSIMessage *translated, > > > + uint16_t sid) > > > +{ > > > + assert(origin && translated); > > > + > > > + trace_amdvi_ir_remap_msi_req(origin->address, origin->data, sid); > > > + > > > + if (!iommu || !iommu->intr_enabled) { > > > + memcpy(translated, origin, sizeof(*origin)); > > > + goto out; > > > + } > > > + > > > + if (origin->address & AMDVI_MSI_ADDR_HI_MASK) { > > > + trace_amdvi_err("MSI address high 32 bits non-zero when " > > > + "Interrupt Remapping enabled."); > > > + return -AMDVI_IR_ERR; > > > + } > > > + > > > + if ((origin->address & AMDVI_MSI_ADDR_LO_MASK) != > > > APIC_DEFAULT_ADDRESS) { > > > + trace_amdvi_err("MSI is not from IOAPIC."); > > > + return -AMDVI_IR_ERR; > > > + } > > > + > > > +out: > > > + trace_amdvi_ir_remap_msi(origin->address, origin->data, > > > + translated->address, translated->data); > > > + return 0; > > > +} > > > + > > > +static int amdvi_int_remap(X86IOMMUState *iommu, > > > + MSIMessage *origin, > > > + MSIMessage *translated, > > > + uint16_t sid) > > > +{ > > > + return amdvi_int_remap_msi(AMD_IOMMU_DEVICE(iommu), origin, > > > + translated, sid); > > > +} > > > + > > > +static MemTxResult amdvi_mem_ir_write(void *opaque, hwaddr addr, > > > + uint64_t value, unsigned size, > > > + MemTxAttrs attrs) > > > +{ > > > + int ret; > > > + MSIMessage from = { 0, 0 }, to = { 0, 0 }; > > > + uint16_t sid = AMDVI_IOAPIC_SB_DEVID; > > > + > > > + from.address = (uint64_t) addr + AMDVI_INT_ADDR_FIRST; > > > + from.data = (uint32_t) value; > > > + > > > + trace_amdvi_mem_ir_write_req(addr, value, size); > > > + > > > + if (!attrs.unspecified) { > > > + /* We have explicit Source ID */ > > > + sid = attrs.requester_id; > > > + } > > > + > > > + ret = amdvi_int_remap_msi(opaque, &from, &to, sid); > > > + if (ret < 0) { > > > + /* TODO: report error */ > > > > How do you plan to address this TODO item? > > > > > + /* Drop the interrupt */ > > > > What does this comment mean? Is this also a TODO item? > > > As per the specs, if we are not able to remap the interrupts then we > should be log the events so that if needed guest OS can access the log > events and make some decisions. I have not implemented this yet. > I still need to understand how all these things works before > attempting to emulate this part of code. > > I have to see what can be done in addition to log to handle the > cases where we failed to remap. For now, I just added a comment so that > it reminds us to revisit it.
I see. Should we call error_report_once() so users can see a warning in case the guest is doing something that we don't support yet? It would be nice if the comment mentions that we're supposed to log the events. > > > > > > > + return MEMTX_ERROR; > > > + } > > > + > > > + apic_get_class()->send_msi(&to); > > > + > > > + trace_amdvi_mem_ir_write(to.address, to.data); > > > + return MEMTX_OK; > > > +} > > > + > > > +static MemTxResult amdvi_mem_ir_read(void *opaque, hwaddr addr, > > > + uint64_t *data, unsigned size, > > > + MemTxAttrs attrs) > > > +{ > > > + return MEMTX_OK; > > > +} > > > + > > > +static const MemoryRegionOps amdvi_ir_ops = { > > > + .read_with_attrs = amdvi_mem_ir_read, > > > + .write_with_attrs = amdvi_mem_ir_write, > > > + .endianness = DEVICE_LITTLE_ENDIAN, > > > + .impl = { > > > + .min_access_size = 4, > > > + .max_access_size = 4, > > > + }, > > > + .valid = { > > > + .min_access_size = 4, > > > + .max_access_size = 4, > > > + } > > > +}; > > > + > > > static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, > > > int devfn) > > > { > > > + char name[128]; > > > AMDVIState *s = opaque; > > > - AMDVIAddressSpace **iommu_as; > > > + AMDVIAddressSpace **iommu_as, *amdvi_dev_as; > > > int bus_num = pci_bus_num(bus); > > > iommu_as = s->address_spaces[bus_num]; > > > @@ -1043,19 +1139,46 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus > > > *bus, void *opaque, int devfn) > > > /* set up AMD-Vi region */ > > > if (!iommu_as[devfn]) { > > > + snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn); > > > + > > > iommu_as[devfn] = g_malloc0(sizeof(AMDVIAddressSpace)); > > > iommu_as[devfn]->bus_num = (uint8_t)bus_num; > > > iommu_as[devfn]->devfn = (uint8_t)devfn; > > > iommu_as[devfn]->iommu_state = s; > > > - memory_region_init_iommu(&iommu_as[devfn]->iommu, > > > - sizeof(iommu_as[devfn]->iommu), > > > + amdvi_dev_as = iommu_as[devfn]; > > > + > > > + /* > > > + * Memory region relationships looks like (Address range shows > > > + * only lower 32 bits to make it short in length...): > > > + * > > > + * |-----------------+-------------------+----------| > > > + * | Name | Address range | Priority | > > > + * |-----------------+-------------------+----------+ > > > + * | amdvi_root | 00000000-ffffffff | 0 | > > > + * | amdvi_iommu | 00000000-ffffffff | 1 | > > > + * | amdvi_iommu_ir | fee00000-feefffff | 64 | > > > + * |-----------------+-------------------+----------| > > > + */ > > > + memory_region_init_iommu(&amdvi_dev_as->iommu, > > > + sizeof(amdvi_dev_as->iommu), > > > > The change from iommu_as[devfn] to amdvi_dev_as makes this patch > > harder to review. Not a big deal, but if you introduce it in a > > separate patch you'll help reviewers. > > > > > > Sure, I can make this as a separate patch to help reviewers. > > > > > TYPE_AMD_IOMMU_MEMORY_REGION, > > > OBJECT(s), > > > "amd-iommu", UINT64_MAX); > > > - address_space_init(&iommu_as[devfn]->as, > > > - MEMORY_REGION(&iommu_as[devfn]->iommu), > > > - "amd-iommu"); > > > + memory_region_init(&amdvi_dev_as->root, OBJECT(s), > > > + "amdvi_root", UINT64_MAX); > > > + address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name); > > > > The old code simply used "amd-iommu" as the address space name, > > why did you decide to rename it? The commit message says you > > renamed it, but doesn't say why. > > > > The new name follows a different style: the old one used "-" > > and the new one uses "_". Why? > > > > > > > I was trying to be consistent with intel-iommu device -- which uses "_" > instead of "-" for region names. I can log this in commit message > in next rev. Consistency is probably a good justification. If that's documented in the commit message, it would be good enough to me. > > > > > > + memory_region_init_io(&amdvi_dev_as->iommu_ir, OBJECT(s), > > > + &amdvi_ir_ops, s, "amd-iommu-ir", > > > + AMDVI_INT_ADDR_SIZE); > > > + memory_region_add_subregion_overlap(&amdvi_dev_as->root, > > > + AMDVI_INT_ADDR_FIRST, > > > + &amdvi_dev_as->iommu_ir, > > > + 64); > > > + memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0, > > > + > > > MEMORY_REGION(&amdvi_dev_as->iommu), > > > + 1); > > > + > > > } > > > return &iommu_as[devfn]->as; > > > } > > > @@ -1173,6 +1296,10 @@ static void amdvi_realize(DeviceState *dev, Error > > > **err) > > > return; > > > } > > > + /* Pseudo address space under root PCI bus. */ > > > + pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, > > > AMDVI_IOAPIC_SB_DEVID); > > > + s->intr_enabled = x86_iommu->intr_supported; > > > + > > > /* set up MMIO */ > > > memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, > > > "amdvi-mmio", > > > AMDVI_MMIO_SIZE); > > > @@ -1206,6 +1333,7 @@ static void amdvi_class_init(ObjectClass *klass, > > > void* data) > > > dc->vmsd = &vmstate_amdvi; > > > dc->hotpluggable = false; > > > dc_class->realize = amdvi_realize; > > > + dc_class->int_remap = amdvi_int_remap; > > > /* Supported by the pc-q35-* machine types */ > > > dc->user_creatable = true; > > > } > > > diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h > > > index 8740305..71ff3c1 100644 > > > --- a/hw/i386/amd_iommu.h > > > +++ b/hw/i386/amd_iommu.h > > > @@ -206,8 +206,18 @@ > > > #define AMDVI_COMMAND_SIZE 16 > > > -#define AMDVI_INT_ADDR_FIRST 0xfee00000 > > > -#define AMDVI_INT_ADDR_LAST 0xfeefffff > > > +#define AMDVI_INT_ADDR_FIRST 0xfee00000 > > > +#define AMDVI_INT_ADDR_LAST 0xfeefffff > > > +#define AMDVI_INT_ADDR_SIZE (AMDVI_INT_ADDR_LAST - > > > AMDVI_INT_ADDR_FIRST + 1) > > > +#define AMDVI_MSI_ADDR_HI_MASK (0xffffffff00000000ULL) > > > +#define AMDVI_MSI_ADDR_LO_MASK (0x00000000ffffffffULL) > > > + > > > +/* SB IOAPIC is always on this device in AMD systems */ > > > +#define AMDVI_IOAPIC_SB_DEVID PCI_BUILD_BDF(0, PCI_DEVFN(0x14, 0)) > > > + > > > +/* Interrupt remapping errors */ > > > +#define AMDVI_IR_ERR 0x1 > > > + > > > #define TYPE_AMD_IOMMU_DEVICE "amd-iommu" > > > #define AMD_IOMMU_DEVICE(obj)\ > > > @@ -278,6 +288,9 @@ typedef struct AMDVIState { > > > /* IOTLB */ > > > GHashTable *iotlb; > > > + > > > + /* Interrupt remapping */ > > > + bool intr_enabled; > > > > Why do you need this field if the same info is already available > > at AMDVIState::iommu::intr_supported? > > > Again this is to be consistent with intel-iommu structure which has this > fields. Having said that I should be able to access the > AMDVIState::iommu::intr_supported and remove this new field. intel-iommu seems to need the field because intr_enabled can change at runtime at vtd_handle_gcmd_ire(). Does amd-iommu have anything equivalent? > > > > > > > > } AMDVIState; > > > #endif > > > diff --git a/hw/i386/trace-events b/hw/i386/trace-events > > > index 9e6fc4d..41d533c 100644 > > > --- a/hw/i386/trace-events > > > +++ b/hw/i386/trace-events > > > @@ -101,6 +101,11 @@ amdvi_mode_invalid(uint8_t level, uint64_t > > > addr)"error: translation level 0x%"PR > > > amdvi_page_fault(uint64_t addr) "error: page fault accessing guest > > > physical address 0x%"PRIx64 > > > amdvi_iotlb_hit(uint8_t bus, uint8_t slot, uint8_t func, uint64_t addr, > > > uint64_t txaddr) "hit iotlb devid %02x:%02x.%x gpa 0x%"PRIx64" hpa > > > 0x%"PRIx64 > > > amdvi_translation_result(uint8_t bus, uint8_t slot, uint8_t func, > > > uint64_t addr, uint64_t txaddr) "devid: %02x:%02x.%x gpa 0x%"PRIx64" hpa > > > 0x%"PRIx64 > > > +amdvi_mem_ir_write_req(uint64_t addr, uint64_t val, uint32_t size) "addr > > > 0x%"PRIx64" data 0x%"PRIx64" size 0x%"PRIx32 > > > +amdvi_mem_ir_write(uint64_t addr, uint64_t val) "addr 0x%"PRIx64" data > > > 0x%"PRIx64 > > > +amdvi_ir_remap_msi_req(uint64_t addr, uint64_t data, uint8_t devid) > > > "addr 0x%"PRIx64" data 0x%"PRIx64" devid 0x%"PRIx8 > > > +amdvi_ir_remap_msi(uint64_t addr, uint64_t data, uint64_t addr2, > > > uint64_t data2) "(addr 0x%"PRIx64", data 0x%"PRIx64") -> (addr > > > 0x%"PRIx64", data 0x%"PRIx64")" > > > +amdvi_err(const char *str) "%s" > > > # hw/i386/vmport.c > > > vmport_register(unsigned char command, void *func, void *opaque) > > > "command: 0x%02x func: %p opaque: %p" > > > -- > > > 2.7.4 > > > > > > > > -- Eduardo