On Wed, Sep 09, 2015 at 12:45:10PM +0500, Valentine Sinitsyn wrote: > On 09.09.2015 12:30, David kiarie wrote: > ...snip... > > >>>+static void amd_iommu_cmdbuf_exec(AMDIOMMUState *s) > >>>+{ > >>>+ unsigned type; > >>>+ uint8_t cmd[IOMMU_COMMAND_SIZE]; > >>>+ > >>>+ IOMMU_DPRINTF(COMMAND, ""); > >>>+ memset(cmd, 0, IOMMU_COMMAND_SIZE); > >>>+ > >>>+ if(dma_memory_read(&address_space_memory, s->cmdbuf + s->cmdbuf_head, > >>>cmd, IOMMU_COMMAND_SIZE)){ > >>>+ IOMMU_DPRINTF(COMMAND, "error: fail to access memory at > >>>0x%"PRIx64 > >>>+ " + %"PRIu8, s->cmdbuf, s->cmdbuf_head); > >>>+ } > >>>+ > >>>+ type = cmd[CMDBUF_ID_BYTE] >> CMDBUF_ID_RSHIFT; > >>>+ > >>>+ switch(type){ > >>>+ case CMD_COMPLETION_WAIT: > >>>+ /* pretend to wait for command execution to complete */ > >>>+ IOMMU_DPRINTF(COMMAND, "completion wait requested"); > >>>+ amd_iommu_completion_wait(s, cmd); > >>>+ break; > >>>+ case CMD_INVAL_DEVTAB_ENTRY: > >>>+ /* Not implemented yet - should just clear device table > >>>contents */ > >>>+ IOMMU_DPRINTF(COMMAND, "device table entries invalidated"); > >> > >>Better make it clear that these commands are not implemented yet. An IOMMU > >>is hardly usable without invalidators. > > > >I think the "debug prinft" here is misleading. AFAIK, the commands > >listed here(there are others which I haven't listed) relate to 'cache' > >which means this commands do nothing in the absence of cache. > > > >The reason why I advertise 'cache' is because the linux driver checks > >for cache and other optional features to determine the IOMMU version > >whereby versions 2 of IOMMU are assummed to support this features. > Did you check how address translation is emulated in QEMU. I didn't :) But > you return IOMMTLBEntry from your translation routine, and I'd rather ensure > it is not cached in some other layer. > > Besides, omitting invalidation commands will be a problem on real hardware. > As IOMMU emulation is mainly useful for debugging, I'd reproduce this > behaviour in the emulator. I mean, if I forget to do CMD_INVAL_DEVTAB_ENTRY > in my guest code, things shouldn't work properly.
That's hardly a priority. IOMMU is useful for other things besides debugging. > You can just copy all data structures to AMDIOMMUState and use this "cahced" > copies, updated when you run invalidation commands. But this isn't a #1 > priority, you are right. > > Can you share some tests results for this code? > > > >> > >>>+ break; > >>>+ case CMD_INVAL_IOMMU_PAGES: > >>>+ /* invalidate IOMMU cache for a particular domain */ > >>>+ IOMMU_DPRINTF(COMMAND, "IOMMU pages invalidated"); > >>>+ break; > >>>+ case CMD_INVAL_IOTLB_PAGES: > >>>+ /* Only present if remote IOTLB present */ > >>>+ IOMMU_DPRINTF(COMMAND, "IOTLB pages invalidated"); > >>>+ break; > >>>+ case CMD_INVAL_INTR_TABLE: > >>>+ /* no support for interrupt remapping yet */ > >>>+ IOMMU_DPRINTF(COMMAND, "interrupt table invalidated"); > >>>+ break; > >>>+ default: > >>>+ IOMMU_DPRINTF(COMMAND, "unhandled command %d", type); > >>>+ break; > >>>+ } > >>>+ > >>>+} > >>>+ > >>>+static void amd_iommu_cmdbuf_run(AMDIOMMUState *s) > >>>+{ > >>>+ IOMMU_DPRINTF(COMMAND, ""); > >>>+ > >>>+ uint64_t *mmio_cmdbuf_head = (uint64_t*)s->mmior + MMIO_COMMAND_HEAD; > >>>+ > >>>+ if(!s->cmdbuf_enabled){ > >>>+ IOMMU_DPRINTF(COMMAND, "Command buffer not enabled"); > >>>+ return; > >>>+ } > >>>+ > >>>+ while(s->cmdbuf_head != s->cmdbuf_tail) { > >>>+ /* check if there is work to do. */ > >>>+ IOMMU_DPRINTF(COMMAND, "COMMAND BUFFER head at %x COMMAND BUFFER > >>>tail at %x", > >>>+ s->cmdbuf_head, s->cmdbuf_tail); > >>>+ amd_iommu_cmdbuf_exec(s); > >>>+ s->cmdbuf_head += CMDBUF_ENTRY_SIZE; > >>>+ > >>>+ /* wrap head pointer */ > >>>+ if (s->cmdbuf_head >= s->cmdbuf_len * CMDBUF_ENTRY_SIZE) { > >>>+ s->cmdbuf_head = 0; > >>>+ } > >>>+ } > >>>+ > >>>+ *mmio_cmdbuf_head = cpu_to_le64(s->cmdbuf_head); > >>>+} > >>>+ > >>>+/* System Software might never read from some of this fields but anyways > >>>*/ > >>>+static uint64_t amd_iommu_mmio_read(void *opaque, hwaddr addr, unsigned > >>>size) > >>>+{ > >>>+ AMDIOMMUState *s = opaque; > >>>+ > >>>+ uint64_t val = -1; > >>>+ if(addr + size > MMIO_SIZE) { > >>>+ IOMMU_DPRINTF(MMIO, "error: addr outside region: max 0x%"PRIX64 > >>>+ ", got 0x%"PRIx64 " %d", > >>>+ (uint64_t)MMIO_SIZE, addr, size); > >>>+ return (uint64_t) - 1; > >>>+ } > >>>+ > >>>+ switch(addr & ~0x07){ > >>>+ case MMIO_DEVICE_TABLE: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_DEVICE_TABLE read addr 0x%"PRIx64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size == 2){ > >>>+ val = amd_iommu_readw(s, addr); > >>>+ } else if(size == 4){ > >>>+ val = amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size == 8){ > >>>+ val = amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >> > >>What makes these cases different, except debug printf()s? Maybe leave > >>printf()s inside the switch, and factor out the rest. This applies to other > >>similar cases as well. > >> > >> > >>>+ > >>>+ case MMIO_COMMAND_BASE: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_COMMAND_BASE read addr 0x%"PRIx64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size == 2){ > >>>+ val = amd_iommu_readw(s, addr); > >>>+ } else if(size == 4){ > >>>+ val = amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size == 8){ > >>>+ val = amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_EVENT_BASE: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EVENT_BASE read addr 0x%"PRIx64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size == 2){ > >>>+ val = amd_iommu_readw(s, addr); > >>>+ } else if(size == 4){ > >>>+ val = amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size == 8){ > >>>+ val = amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_CONTROL: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_CONTROL read addr 0x%"PRIx64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size == 2){ > >>>+ val = amd_iommu_readw(s, addr); > >>>+ } else if(size == 4){ > >>>+ val = amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size == 8){ > >>>+ val = amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_EXCL_BASE: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EXCL_BASE read addr 0x%"PRIx64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size == 2){ > >>>+ val = amd_iommu_readw(s, addr); > >>>+ } else if(size == 4){ > >>>+ val = amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size == 8){ > >>>+ val = amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_EXCL_LIMIT: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EXCL_LIMIT read addr 0x%"PRIx64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size == 2){ > >>>+ val = amd_iommu_readw(s, addr); > >>>+ } else if(size == 4){ > >>>+ val = amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size == 8){ > >>>+ val = amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_COMMAND_HEAD: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_COMMAND_HEAD read addr 0x%"PRIx64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size == 2){ > >>>+ val = amd_iommu_readw(s, addr); > >>>+ } else if(size == 4){ > >>>+ val = amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size == 8){ > >>>+ val = amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_COMMAND_TAIL: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_COMMAND_TAIL read addr 0x%"PRIx64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size == 2){ > >>>+ val = amd_iommu_readw(s, addr); > >>>+ } else if(size == 4){ > >>>+ val = amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size == 8){ > >>>+ val = amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_EVENT_HEAD: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EVENT_HEAD read addr 0x%"PRIx64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size == 2){ > >>>+ val = amd_iommu_readw(s, addr); > >>>+ } else if(size == 4){ > >>>+ val = amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size == 8){ > >>>+ val = amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_EVENT_TAIL: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EVENT_TAIL read addr 0x%"PRIx64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size == 2){ > >>>+ val = amd_iommu_readw(s, addr); > >>>+ } else if(size == 4){ > >>>+ val = amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size == 8){ > >>>+ val = amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_STATUS: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_STATUS read addr 0x%"PRIx64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size == 2){ > >>>+ val = amd_iommu_readw(s, addr); > >>>+ } else if(size == 4){ > >>>+ val = amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size == 8){ > >>>+ val = amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_EXT_FEATURES: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EXT_FEATURES read addr 0x%"PRIx64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size == 2){ > >>>+ val = amd_iommu_readw(s, addr); > >>>+ } else if(size == 4){ > >>>+ val = amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size == 8){ > >>>+ val = amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ default: > >>>+ IOMMU_DPRINTF(MMIO, "UNHANDLED MMIO read addr 0x%"PRIx64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ } > >>>+ > >>>+ return val; > >>>+} > >>>+ > >>>+static void iommu_handle_control_write(AMDIOMMUState *s) > >>>+{ > >>>+ /* read whatever is already written incase software is writing in > >>>chucks less than 8 bytes */ > >>>+ unsigned long control = amd_iommu_readq(s, MMIO_CONTROL); > >>>+ s->enabled = !!(control & MMIO_CONTROL_IOMMUEN); > >>>+ > >>>+ s->ats_enabled = !!(control & MMIO_CONTROL_HTTUNEN); > >>>+ s->evtlog_enabled = s->enabled && !!(control & > >>>MMIO_CONTROL_EVENTLOGEN); > >>>+ > >>>+ s->evtlog_intr = !!(control & MMIO_CONTROL_EVENTINTEN); > >>>+ s->completion_wait_intr = !!(control & MMIO_CONTROL_COMWAITINTEN); > >>>+ s->cmdbuf_enabled = s->enabled && !!(control & > >>>MMIO_CONTROL_CMDBUFLEN); > >>>+ > >>>+ /* update the flags depending on the control register */ > >>>+ if(s->cmdbuf_enabled) { > >>>+ (*(uint64_t*)&s->mmior[MMIO_STATUS]) |= MMIO_STATUS_CMDBUF_RUN; > >>>+ } else { > >>>+ (*(uint64_t*)&s->mmior[MMIO_STATUS]) &= ~MMIO_STATUS_CMDBUF_RUN; > >>>+ } > >>>+ if (s->evtlog_enabled) { > >>>+ (*(uint64_t*)&s->mmior[MMIO_STATUS]) |= MMIO_STATUS_EVTLOG_RUN; > >>>+ } else { > >>>+ (*(uint64_t*)&s->mmior[MMIO_STATUS]) &= ~MMIO_STATUS_EVTLOG_RUN; > >>>+ } > >>>+ > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_STATUS state 0x%"PRIx64, control); > >>>+ > >>>+ amd_iommu_cmdbuf_run(s); > >> > >>Does any write to control register result in draining command buffer? To me, > >>it looks strange, and I'd expect IOMMU to execute commands only if I change > >>tail pointer. > >> > >> > >>>+} > >>>+ > >>>+/* FIXME: something might go wrong if System Software writes in chunks > >>>+ * of one byte but linux writes in chunks of 4 bytes so currently it > >>>+ * works correctly with linux but will definitely be busted if software > >>>+ * reads/writes 8 bytes > >>>+ */ > >>>+static void amd_iommu_mmio_write(void *opaque, hwaddr addr, uint64_t val, > >>>unsigned size) > >>>+{ > >>>+ > >>>+ AMDIOMMUState *s = opaque; > >>>+ unsigned long offset = addr & 0x07; > >>>+ > >>>+ if(addr + size > MMIO_SIZE) { > >>>+ IOMMU_DPRINTF(MMIO, "error: addr outside region: max 0x%"PRIx64 > >>>+ ", got 0x%"PRIx64 " %d", > >>>+ (uint64_t)MMIO_SIZE, addr, size); > >>>+ return; > >>>+ } > >>>+ > >>>+ switch(addr & ~0x07){ > >>>+ case MMIO_CONTROL: > >>>+ if(size == 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size == 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size == 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_CONTROL write addr 0x%"PRIx64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset 0x%"PRIx64 > >>>, addr, size, val, offset); > >>>+ iommu_handle_control_write(s); > >>>+ break; > >>>+ > >>>+ case MMIO_DEVICE_TABLE: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_DEVICE_TABLE write addr 0x%"PRIx64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size == 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size == 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size == 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ > >>>+ /* set device table address > >>>+ * This also suffers from inability to tell whether software > >>>is done writing > >>>+ */ > >>>+ if(offset || (size == 8)){ > >>>+ unsigned long device_base = amd_iommu_readq(s, > >>>MMIO_DEVICE_TABLE); > >>>+ s->devtab = (dma_addr_t)(device_base & > >>>MMIO_CMDBUF_BASE_MASK); > >>>+ /* set device table length */ > >>>+ s->devtab_len = ((device_base & MMIO_DEVTAB_SIZE_MASK) + > >>>1) * (MMIO_DEVTAB_SIZE_UNIT / > >>>+ MMIO_DEVTAB_ENTRY_SIZE); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_COMMAND_HEAD: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_COMMAND_HEAD write addr 0x%"PRIx64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size == 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size == 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size == 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ s->cmdbuf_head = val & MMIO_CMDBUF_HEAD_MASK; > >>>+ amd_iommu_cmdbuf_run(s); > >>>+ break; > >>>+ > >>>+ case MMIO_COMMAND_BASE: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_COMMAND_BASE write addr 0x%"PRIx64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size == 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size == 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size == 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ > >>>+ /* FIXME - make sure System Software has finished writing > >>>incase it writes > >>>+ * in chucks less than 8 bytes in a robust way.As for now, > >>>this hacks works > >>>+ * for the linux driver > >>>+ */ > >>>+ if(offset || (size == 8)){ > >>>+ unsigned long command_base = amd_iommu_readq(s, > >>>MMIO_COMMAND_BASE); > >>>+ s->cmdbuf = (dma_addr_t)(command_base & > >>>MMIO_CMDBUF_BASE_MASK); > >>>+ s->cmdbuf_len = 1UL << (s->mmior[MMIO_CMDBUF_SIZE_BYTE] & > >>>MMIO_CMDBUF_SIZE_MASK); > >>>+ s->cmdbuf_head = s->cmdbuf_tail = 0; > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_COMMAND_TAIL: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_COMMAND_TAIL write addr 0x%"PRIx64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size == 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size == 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size == 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ > >>>+ s->cmdbuf_tail = val & MMIO_CMDBUF_TAIL_MASK; > >>>+ amd_iommu_cmdbuf_run(s); > >>>+ break; > >>>+ > >>>+ case MMIO_EVENT_BASE: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EVENT_BASE write addr 0x%"PRIx64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size == 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size == 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size == 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ s->evtlog = (val & MMIO_EVTLOG_BASE_MASK); > >>>+ s->evtlog_len = 1UL << > >>>(*(uint64_t*)&s->mmior[MMIO_EVTLOG_SIZE_BYTE] & MMIO_EVTLOG_SIZE_MASK); > >>>+ break; > >>>+ > >>>+ case MMIO_EXCL_LIMIT: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EXCL_LIMIT write addr 0x%"PRIx64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size == 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size == 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size == 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ s->excl_limit = (val & MMIO_EXCL_LIMIT_MASK) | > >>>MMIO_EXCL_LIMIT_LOW; > >>>+ break; > >>>+ //TODO : Event handling fixes > >>>+ > >>>+ case MMIO_EVENT_HEAD: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EVENT_HEAD write addr 0x%"PRIx64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size == 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size == 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size == 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_EVENT_TAIL: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EVENT_TAIL write addr 0x%"PRIx64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size == 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size == 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size == 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ break; > >>>+ /* PPR log base - unused for now */ > >>>+ case MMIO_PPR_BASE: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_PPR_BASE write addr 0x%"PRIx64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size == 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size == 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size == 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ break; > >>>+ /* PPR log head - also unused for now */ > >>>+ case MMIO_PPR_HEAD: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_PPR_HEAD write addr 0x%"PRIx64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size == 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size == 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size == 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ break; > >>>+ /* PPR log tail - unused for now */ > >>>+ case MMIO_PPR_TAIL: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_PPR_TAIL write addr 0x%"PRIx64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size == 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size == 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size == 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ break; > >>>+ > >>>+ default: > >>>+ IOMMU_DPRINTF(MMIO, "UNHANDLED MMIO write addr 0x%"PRIx64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ ; > >>>+ } > >>>+ > >>>+} > >>>+ > >>>+static void amd_iommu_log_event(AMDIOMMUState *s, AMDIOMMUEvent *evt) > >>>+{ > >>>+ /* event logging not enabled */ > >>>+ if(!s->evtlog_enabled || *(uint64_t*)&s->mmior[MMIO_STATUS] | > >>>MMIO_STATUS_EVTLOG_OF){ > >>>+ return; > >>>+ } > >>>+ > >>>+ /* event log buffer full */ > >>>+ if(s->evtlog_tail >= s->evtlog_len) { > >>>+ *(uint64_t*)&s->mmior[MMIO_STATUS] |= MMIO_STATUS_EVTLOG_OF; > >>>+ } > >>>+ > >>>+ if(dma_memory_write(&address_space_memory, s->evtlog_len + > >>>s->evtlog_tail, &evt, EVENT_LEN)){ > >>>+ //IOMMU_DPRINTF(ELOG, "error: fail to write at address 0%x"PRIu8, > >>>s->evtlog + s->evtlog_tail); > >>>+ } > >>>+ > >>>+ s->evtlog_tail += EVENT_LEN; > >>>+ *(uint64_t*)&s->mmior[MMIO_STATUS] |= MMIO_STATUS_EVTLOG_INTR; > >>>+ /* Disable logging if Overflow ? */ > >> > >>No, the spec details event logging restart procedure. Make sure emulated > >>IOMMU behaves as described there. > >> > >> > >>>+} > >>>+ > >>>+static void amd_iommu_page_fault(AMDIOMMUState *s, int devfn, unsigned > >>>domid, dma_addr_t addr, int present, > >>>+ int is_write) > >>>+{ > >>>+ IOMMU_DPRINTF(ELOG, ""); > >>>+ > >>>+ AMDIOMMUEvent evt; > >>>+ unsigned info; > >>>+ uint16_t status; > >>>+ > >>>+ evt.devfn = cpu_to_le16(devfn); > >>>+ evt.reserved = 0; > >>>+ evt.domid = cpu_to_le16(domid); > >>>+ evt.addr = cpu_to_le16(addr); > >>>+ > >>>+ info = EVENT_IOPF; > >>>+ > >>>+ if(present) { > >>>+ info |= EVENT_IOPF_PR; > >>>+ } > >>>+ > >>>+ if (is_write) { > >>>+ info |= EVENT_IOPF_RW; > >>>+ } > >>>+ > >>>+ /* log a page fault */ > >>>+ amd_iommu_log_event(s, &evt); > >>>+ > >>>+ /* Abort the translation */ > >>>+ status = pci_get_word(s->dev.config + PCI_STATUS); > >>>+ pci_set_word(s->dev.config + PCI_STATUS, status | > >>>PCI_STATUS_SIG_TARGET_ABORT); > >>>+} > >>>+ > >>>+static inline uint64_t amd_iommu_get_perms(uint64_t entry) > >>>+{ > >>>+ return (entry &(DEV_PERM_READ | DEV_PERM_WRITE)) >> DEV_PERM_SHIFT; > >>>+} > >>>+ > >>>+/* FIXME deduplication */ > >>>+AddressSpace *bridge_host_amd_iommu(PCIBus *bus, void *opaque, int devfn) > >>>+{ > >>>+ AMDIOMMUState *s = opaque; > >>>+ AMDIOMMUAddressSpace **iommu_as; > >>>+ int bus_num = pci_bus_num(bus); > >>>+ > >>>+ /* just in case */ > >>>+ assert(0 <= bus_num && bus_num <= PCI_BUS_MAX); > >>>+ assert(0 <= devfn && devfn <= PCI_DEVFN_MAX); > >>>+ > >>>+ iommu_as = s->address_spaces[bus_num]; > >>>+ > >>>+ /* allocate memory during the first run */ > >>>+ if(!iommu_as) { > >>>+ iommu_as = > >>>g_malloc0(sizeof(AMDIOMMUAddressSpace*)*PCI_DEVFN_MAX); > >>>+ s->address_spaces[bus_num] = iommu_as; > >>>+ } > >>>+ > >>>+ /* set up IOMMU region */ > >>>+ if(!iommu_as[devfn]){ > >>>+ iommu_as[devfn] = g_malloc0(sizeof(AMDIOMMUAddressSpace)); > >>>+ iommu_as[devfn]->bus_num = (uint8_t)bus_num; > >>>+ iommu_as[devfn]->devfn = (uint8_t)devfn; > >>>+ iommu_as[devfn]->iommu_state = s; > >>>+ > >>>+ memory_region_init_iommu(&iommu_as[devfn]->iommu, OBJECT(s), > >>>&s->iommu_ops, "amd-iommu", UINT64_MAX); > >>>+ address_space_init(&iommu_as[devfn]->as, &iommu_as[devfn]->iommu, > >>>"amd-iommu"); > >>>+ } > >>>+ return &iommu_as[devfn]->as; > >>>+} > >>>+ > >>>+/* validate a page table entry */ > >>>+static bool amd_iommu_validate_pte(uint64_t pte) > >>>+{ > >>>+ return pte & DEV_VALID && pte & DEV_TRANSLATION_VALID; > >> > >>I'd suggest parenthesis here. > >> > >> > >>>+} > >>>+ > >>>+/* get a device table entry given the device function */ > >>>+static bool amd_iommu_get_dte(AMDIOMMUState *s, int devfn, uint64_t > >>>*entry) > >>>+{ > >>>+ /* Could lead to out of range accesses */ > >>>+ assert(0 < devfn && devfn < PCI_DEVFN_MAX); > >>>+ > >>>+ uint32_t offset = devfn * DEVTAB_ENTRY_SIZE; > >>>+ if(dma_memory_read(&address_space_memory, s->devtab + offset, entry, > >>>32)){ > >>>+ IOMMU_DPRINTF(MMU, "error: fail to access Device Entry devtab > >>>0x%"PRIx64 > >>>+ "offset 0x%"PRIx32, s->devtab, offset); > >>>+ return false; > >>>+ } > >>>+ > >>>+ if(!amd_iommu_validate_pte(entry[0])){ > >>>+ IOMMU_DPRINTF(MMU, > >>>+ "Pte entry at 0x%"PRIx64" is invalid", entry[0]); > >>>+ return false; > >>>+ } > >>>+ > >>>+ return true; > >>>+} > >>>+ > >>>+static uint64_t get_pte_translation_mode(uint64_t pte) > >>>+{ > >>>+ uint64_t level = (pte >> DEV_MODE_RSHIFT) & DEV_MODE_MASK; > >>>+ > >>>+ if(level == 7){ > >>>+ /* reserved level - abort ? */ > >>>+ IOMMU_DPRINTF(MMU, > >>>+ "Reserved translation level (7) detected"); > >>>+ return 0; > >>>+ } > >>>+ > >>>+ return level; > >>>+} > >>>+ > >>>+/* TODO : Mark addresses as Accessed and Dirty */ > >>>+static void amd_iommu_do_translate(AMDIOMMUAddressSpace *as, hwaddr addr, > >>>bool is_write, IOMMUTLBEntry *ret) > >>>+{ > >>>+ AMDIOMMUState *s = as->iommu_state; > >>>+ > >>>+ int present; > >>>+ dma_addr_t pte_addr; > >>>+ uint64_t entry[4], pte, pte_perms; > >>>+ unsigned level; > >>>+ unsigned perms; > >>>+ > >>>+ if(!amd_iommu_get_dte(s, as->devfn, entry)){ > >>>+ goto no_translation; > >>>+ } > >>>+ > >>>+ pte = entry[0]; > >>>+ > >>>+ /* > >>>+ * It's okay to check for either read or write permissions > >>>+ * even for memory maps, since we don't support R/W maps. > >>>+ */ > >>>+ perms = is_write ? IOMMU_PERM_WRITE : IOMMU_PERM_READ; > >>>+ > >>>+ if(!(level = get_pte_translation_mode(pte))){ > >>>+ goto no_translation; > >>>+ } > >>>+ > >>>+ while(level > 0){ > >>>+ /* > >>>+ * check permissions: the bitwise > >>>+ * implication perms -> entry_perms must be true. Pages must be > >>>present > >>>+ * and permissions on all levels must be similar > >>>+ */ > >>>+ pte_perms = amd_iommu_get_perms(pte); > >>>+ present = pte & 1; > >>>+ if(!present || perms != (perms & pte_perms)){ > >>>+ amd_iommu_page_fault(s, as->devfn, entry[1] & > >>>DEV_DOMID_ID_MASK, addr, present, > >>>+ !!(perms & IOMMU_PERM_WRITE)); > >>>+ return; > >>>+ } > >>>+ > >>>+ /* go to the next lower level */ > >>>+ pte_addr = pte & DEV_PT_ROOT_MASK; > >>>+ pte_addr += ((addr >> ( 9 * level)) & 0xff8); > >> > >>Does this work for six level page tables? The highest level has different > >>bit size there IIRC. > >> > >>>+ pte = ldq_phys(&address_space_memory, pte_addr); > >>>+ level = (pte >> DEV_MODE_RSHIFT) & DEV_MODE_MASK; > >>>+ } > >>>+ > >>>+ ret->iova = addr & IOMMU_PAGE_MASK_4K; > >>>+ ret->translated_addr = (pte & DEV_PT_ROOT_MASK) & IOMMU_PAGE_MASK_4K; > >>>+ ret->addr_mask = ~IOMMU_PAGE_MASK_4K; > >>>+ ret->perm = IOMMU_RW; > >>>+ return; > >>>+ > >>>+no_translation: > >>>+ ret->iova = addr; > >>>+ ret->translated_addr = addr & IOMMU_PAGE_MASK_4K; > >>>+ ret->addr_mask = ~IOMMU_PAGE_MASK_4K; > >>>+ ret->perm = IOMMU_RW; > >>>+ return; > >> > >>Are you sure these transactions needs to be passed through rather than > >>target-aborted? > >> > >>>+} > >>>+ > >>>+static IOMMUTLBEntry amd_iommu_translate(MemoryRegion *iommu, hwaddr > >>>addr, bool is_write) > >>>+{ > >>>+ > >>>+ IOMMU_DPRINTF(GENERAL, ""); > >>>+ > >>>+ AMDIOMMUAddressSpace *as = container_of(iommu, AMDIOMMUAddressSpace, > >>>iommu); > >>>+ AMDIOMMUState *s = as->iommu_state; > >>>+ > >>>+ IOMMUTLBEntry ret = { > >>>+ .target_as = &address_space_memory, > >>>+ .iova = addr, > >>>+ .translated_addr = 0, > >>>+ .addr_mask = ~(hwaddr)0, > >>>+ .perm = IOMMU_NONE, > >>>+ }; > >>>+ > >>>+ if(!s->enabled){ > >>>+ /* IOMMU disabled - corresponds to iommu=off not failure to > >>>provide any parameter */ > >>>+ ret.iova = addr & IOMMU_PAGE_MASK_4K; > >>>+ ret.translated_addr = addr & IOMMU_PAGE_MASK_4K; > >>>+ ret.addr_mask = ~IOMMU_PAGE_MASK_4K; > >>>+ ret.perm = IOMMU_RW; > >>>+ return ret; > >>>+ } > >>>+ > >>>+ amd_iommu_do_translate(as, addr, is_write, &ret); > >>>+ > >>>+ IOMMU_DPRINTF(MMU, > >>>+ "devid: %02x:%02x.%x gpa 0x%"PRIx64 " hpa 0x%"PRIx64, > >>>as->bus_num, > >>>+ PCI_SLOT(as->devfn), PCI_FUNC(as->devfn), addr, > >>>ret.translated_addr); > >>>+ > >>>+ return ret; > >>>+} > >>>+ > >>>+static const MemoryRegionOps mmio_mem_ops = { > >>>+ .read = amd_iommu_mmio_read, > >>>+ .write = amd_iommu_mmio_write, > >>>+ .endianness = DEVICE_LITTLE_ENDIAN, > >>>+ .valid = { > >>>+ .min_access_size = 1, > >>>+ .max_access_size = 8, > >>>+ .unaligned = false, > >>>+ }, > >>>+ > >>>+ .valid = { > >>>+ .min_access_size = 8, > >>>+ .max_access_size = 8, > >>>+ }, > >>>+}; > >>>+ > >>>+static void amd_iommu_set_misc_capab(uint32_t host_va, uint32_t host_pa, > >>>uint32_t guest_va, uint32_t *reg) > >>>+{ > >>>+ *reg = 0; > >> > >>Redundant line. > >> > >> > >>>+ > >>>+ *reg |= MAX_VA_ADDR | MAX_PH_ADDR | MAX_GVA_ADDR; > >>>+} > >>>+ > >>>+/* FIXME: The similar code in reset and init could be combined into a > >>>function */ > >>>+static void amd_iommu_reset(DeviceState *dev) > >>>+{ > >>>+ AMDIOMMUState *s = AMD_IOMMU_DEVICE(dev); > >>>+ > >>>+ uint32_t capab_header = CAPAB_FEATURES; > >>>+ uint32_t capab_base_addr_low = s->mmio.addr & ~(0xffff0000); > >>>+ uint32_t capab_base_addr_high = (s->mmio.addr & ~(0xffff)) >> 16; > >>>+ uint32_t capab_range = 0xff000000; > >>>+ uint32_t capab_misc = 0x0; > >>>+ > >>>+ /* reset MMIO */ > >>>+ memset(s->mmior, 0, MMIO_SIZE); > >>>+ amd_iommu_writeq(s, MMIO_EXT_FEATURES, EXT_FEATURES); > >>>+ /* TODO :Disable event logging and reset all buffers */ > >>>+ amd_iommu_set_misc_capab(MAX_PH_ADDR, MAX_VA_ADDR, MAX_GVA_ADDR, > >>>&capab_misc); > >>>+ > >>>+ /* reset IOMMU specific capabilities */ > >>>+ pci_set_long(s->dev.config + s->capab_offset, capab_header); > >>>+ pci_set_long(s->dev.config + s->capab_offset + CAPAB_BAR_LOW, > >>>capab_base_addr_low); > >>>+ pci_set_long(s->dev.config + s->capab_offset + CAPAB_BAR_HIGH, > >>>capab_base_addr_high); > >>>+ pci_set_long(s->dev.config + s->capab_offset + CAPAB_RANGE, > >>>capab_range); > >>>+ pci_set_long(s->dev.config + s->capab_offset + CAPAB_MISC, > >>>capab_misc); > >> > >> > >>Do we also need to reset MSI capability block? > >> > >> > >>>+ > >>>+ /* reset device ident */ > >>>+ pci_config_set_vendor_id(s->dev.config, PCI_VENDOR_ID_AMD); > >>>+ pci_config_set_device_id(s->dev.config, PCI_DEVICE_ID_RD890_IOMMU); > >>>+ pci_config_set_class(s->dev.config, 0x0806); > >>>+ pci_config_set_prog_interface(s->dev.config, 00); > >>>+} > >>>+ > >>>+static void amd_iommu_write_capab(PCIDevice *dev, uint32_t addr, uint32_t > >>>val, int len) > >>>+{ > >>>+ pci_default_write_config(dev, addr, val, len); > >>>+} > >>>+ > >>>+/* I honestly don't know how to reserve MMIO */ > >>>+static void amd_iommu_mmio_map(AMDIOMMUState *s, hwaddr addr) > >>>+{ > >>>+ if(s->mmio.addr == addr){ > >>>+ return; > >>>+ } > >>>+ > >>>+// if(s->mmio.addr != (hwaddr)-1) { > >>>+// memory_region_del_subregion(get_system_memory(), &s->mmio); > >>>+// } > >>>+ > >>>+ s->mmio.addr = addr; > >>>+ memory_region_add_subregion(get_system_memory(), addr, &s->mmio); > >>>+ > >>>+} > >>>+ > >>>+static int amd_iommu_pci_initfn(PCIDevice *dev) > >>>+{ > >>>+ AMDIOMMUState *s = container_of(dev, AMDIOMMUState, dev); > >>>+ uint32_t capab_header = CAPAB_FEATURES; > >>>+ uint32_t capab_base_addr_low = (s->mmio.addr & ~(0xffff0000)); > >>>+ uint32_t capab_base_addr_high = (s->mmio.addr & ~(0x0000ffff) >> 16); > >>>+ uint32_t capab_range = 0xff000000; > >>>+ uint32_t capab_misc = 0x0; > >>>+ > >>>+ memset(s->mmior, 0, MMIO_SIZE); > >>>+ amd_iommu_set_misc_capab(MAX_PH_ADDR, MAX_VA_ADDR, MAX_GVA_ADDR, > >>>&capab_misc); > >>>+ > >>>+ s->iommu_ops.translate = amd_iommu_translate; > >>>+ s->enabled = false; > >>>+ s->ats_enabled = false; > >>>+ s->cmdbuf_enabled = false; > >>>+ s->devtab_len = 0; > >>>+ s->cmdbuf_len = 0; > >>>+ s->cmdbuf_head = 0; > >>>+ s->cmdbuf_tail = 0; > >>>+ s->evtlog_head = 0; > >>>+ s->evtlog_tail = 0; > >>>+ s->excl_enabled = false; > >>>+ s->excl_allow = false; > >>>+ s->mmio_enabled = false; > >>>+ dev->config_write = amd_iommu_write_capab; > >>>+ > >>>+ size_t offset; > >>>+ /* Save offset for IVRS */ > >>>+ s->capab_offset = offset = pci_add_capability(dev, PCI_CAP_ID_SEC, 0, > >>>CAPAB_SIZE); > >>>+ pci_set_long(s->dev.config + offset, capab_header); > >>>+ pci_set_long(s->dev.config + offset + CAPAB_BAR_LOW, > >>>capab_base_addr_low); > >>>+ pci_set_long(s->dev.config + offset + CAPAB_BAR_HIGH, > >>>capab_base_addr_high); > >>>+ pci_set_long(s->dev.config + offset + CAPAB_RANGE, capab_range); > >>>+ pci_set_long(s->dev.config + offset + CAPAB_MISC, capab_misc); > >>>+ > >>>+ /* add msi and hypertransport capabilities */ > >>>+ pci_add_capability(dev, PCI_CAP_ID_MSI, 0, CAPAB_REG_SIZE); > >>>+ pci_add_capability(dev, PCI_CAP_ID_HT, 0, CAPAB_REG_SIZE); > >>>+ > >>>+ /* set up MMIO */ > >>>+ memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, > >>>"amd-iommu", MMIO_SIZE); > >>>+ amd_iommu_mmio_map(s, BUS_AMD_IOMMU_ADDR); > >>>+ > >>>+ /* reset device ident */ > >>>+ pci_config_set_vendor_id(dev->config, PCI_VENDOR_ID_AMD); > >>>+ pci_config_set_device_id(dev->config, PCI_DEVICE_ID_RD890_IOMMU); > >>>+ > >>>+ /* may not be necessary */ > >>>+ pci_config_set_interrupt_pin(dev->config, 0x1a); > >>>+ > >>>+ return 0; > >>>+} > >>>+ > >>>+static void amd_iommu_realize(PCIDevice *dev, Error **error) > >>>+{ > >>>+ amd_iommu_pci_initfn(dev); > >>>+} > >>>+ > >>>+static const VMStateDescription vmstate_amd_iommu = { > >>>+ .name = "amd-iommu", > >>>+ .fields = (VMStateField[]) { > >>>+ VMSTATE_PCI_DEVICE(dev, AMDIOMMUState), > >>>+ VMSTATE_END_OF_LIST() > >>>+ } > >>>+}; > >>>+ > >>>+static Property amd_iommu_properties[] = { > >>>+ DEFINE_PROP_UINT32("version", AMDIOMMUState, version, 2), > >>>+ DEFINE_PROP_END_OF_LIST(), > >>>+}; > >>>+ > >>>+/* not used for now but will be when cache is added */ > >>>+static void amd_iommu_uninit(PCIDevice *dev) > >>>+{ > >>>+ return; > >>>+} > >>>+ > >>>+static void amd_iommu_class_init(ObjectClass *klass, void* data) > >>>+{ > >>>+ DeviceClass *dc = DEVICE_CLASS(klass); > >>>+ PCIDeviceClass *pci = PCI_DEVICE_CLASS(klass); > >>>+ > >>>+ pci->realize = amd_iommu_realize; > >>>+ pci->exit = amd_iommu_uninit; > >>>+ > >>>+ dc->reset = amd_iommu_reset; > >>>+ dc->vmsd = &vmstate_amd_iommu; > >>>+ dc->props = amd_iommu_properties; > >>>+} > >>>+ > >>>+static const TypeInfo amd_iommu = { > >>>+ .name = TYPE_AMD_IOMMU_DEVICE, > >>>+ .parent = TYPE_PCI_DEVICE, > >>>+ .instance_size = sizeof(AMDIOMMUState), > >>>+ .class_init = amd_iommu_class_init > >>>+}; > >>>+ > >>>+static void amd_iommu_register_types(void) > >>>+{ > >>>+ type_register_static(&amd_iommu); > >>>+} > >>>+ > >>>+type_init(amd_iommu_register_types); > >>>diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h > >>>new file mode 100644 > >>>index 0000000..90309eb > >>>--- /dev/null > >>>+++ b/hw/i386/amd_iommu.h > >>>@@ -0,0 +1,298 @@ > >>>+/* > >>>+ * QEMU emulation of an AMD IOMMU (AMD-Vi) > >>>+ * > >>>+ * Copyright (C) 2011 Eduard-Gabriel Mateanu > >>>+ * Copyright (C) 2015 David Kiarie, <david.kia...@hotmail.com> > >>>+ * > >>>+ * This program is free software; you can redistribute it and/or modify > >>>+ * it under the terms of the GNU General Public License as published by > >>>+ * the Free Software Foundation; either version 2 of the License, or > >>>+ * (at your option) any later version. > >>>+ > >>>+ * This program is distributed in the hope that it will be useful, > >>>+ * but WITHOUT ANY WARRANTY; without even the implied warranty of > >>>+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > >>>+ * GNU General Public License for more details. > >>>+ > >>>+ * You should have received a copy of the GNU General Public License > >>>along > >>>+ * with this program; if not, see <http://www.gnu.org/licenses/>. > >>>+ */ > >>>+ > >>>+#ifndef AMD_IOMMU_H_ > >>>+#define AMD_IOMMU_H_ > >>>+ > >>>+#include "hw/hw.h" > >>>+#include "hw/pci/pci.h" > >>>+#include "hw/sysbus.h" > >>>+#include "sysemu/dma.h" > >>>+#include "qemu/osdep.h" > >>>+#include "qemu/event_notifier.h" > >>>+ > >>>+/* Capability registers */ > >>>+#define CAPAB_HEADER 0x00 > >>>+#define CAPAB_REV_TYPE 0x02 > >>>+#define CAPAB_FLAGS 0x03 > >>>+#define CAPAB_BAR_LOW 0x04 > >>>+#define CAPAB_BAR_HIGH 0x08 > >>>+#define CAPAB_RANGE 0x0C > >>>+#define CAPAB_MISC 0x10 > >>>+#define CAPAB_MISC1 0x14 > >>>+ > >>>+#define CAPAB_SIZE 0x18 > >>>+#define CAPAB_REG_SIZE 0x04 > >>>+ > >>>+/* Capability header data */ > >>>+#define CAPAB_FLAT_EXT (1 << 28) > >>>+#define CAPAB_EFR_SUP (1 << 27) > >>>+#define CAPAB_FLAG_NPCACHE (1 << 26) > >>>+#define CAPAB_FLAG_HTTUNNEL (1 << 25) > >>>+#define CAPAB_FLAG_IOTLBSUP (1 << 24) > >>>+#define CAPAB_INIT_REV (1 << 19) > >>>+#define CAPAB_INIT_TYPE (3 << 16) > >>>+#define CAPAB_INIT_REV_TYPE (CAPAB_REV | CAPAB_TYPE) > >>>+#define CAPAB_INIT_FLAGS (CAPAB_FLAG_NPCACHE | > >>>CAPAB_FLAG_HTTUNNEL) > >>>+#define CAPAB_INIT_MISC ((64 << 15) | (48 << 8)) > >>>+#define CAPAB_BAR_MASK (~((1UL << 14) - 1)) > >>>+ > >>>+/* MMIO registers */ > >>>+#define MMIO_DEVICE_TABLE 0x0000 > >>>+#define MMIO_COMMAND_BASE 0x0008 > >>>+#define MMIO_EVENT_BASE 0x0010 > >>>+#define MMIO_CONTROL 0x0018 > >>>+#define MMIO_EXCL_BASE 0x0020 > >>>+#define MMIO_EXCL_LIMIT 0x0028 > >>>+#define MMIO_EXT_FEATURES 0x0030 > >>>+#define MMIO_COMMAND_HEAD 0x2000 > >>>+#define MMIO_COMMAND_TAIL 0x2008 > >>>+#define MMIO_EVENT_HEAD 0x2010 > >>>+#define MMIO_EVENT_TAIL 0x2018 > >>>+#define MMIO_STATUS 0x2020 > >>>+#define MMIO_PPR_BASE 0x0038 > >>>+#define MMIO_PPR_HEAD 0x2030 > >>>+#define MMIO_PPR_TAIL 0x2038 > >>>+ > >>>+#define MMIO_SIZE 0x4000 > >>>+ > >>>+#define MMIO_DEVTAB_SIZE_MASK ((1ULL << 12) - 1) > >>>+#define MMIO_DEVTAB_BASE_MASK (((1ULL << 52) - 1) & > >>>~MMIO_DEVTAB_SIZE_MASK) > >>>+#define MMIO_DEVTAB_ENTRY_SIZE 32 > >>>+#define MMIO_DEVTAB_SIZE_UNIT 4096 > >>>+ > >>>+#define MMIO_CMDBUF_SIZE_BYTE (MMIO_COMMAND_BASE + 7) > >>>+#define MMIO_CMDBUF_SIZE_MASK 0x0F > >>>+#define MMIO_CMDBUF_BASE_MASK MMIO_DEVTAB_BASE_MASK > >>>+#define MMIO_CMDBUF_DEFAULT_SIZE 8 > >>>+#define MMIO_CMDBUF_HEAD_MASK (((1ULL << 19) - 1) & ~0x0F) > >>>+#define MMIO_CMDBUF_TAIL_MASK MMIO_EVTLOG_HEAD_MASK > >>>+ > >>>+#define MMIO_EVTLOG_SIZE_BYTE (MMIO_EVENT_BASE + 7) > >>>+#define MMIO_EVTLOG_SIZE_MASK MMIO_CMDBUF_SIZE_MASK > >>>+#define MMIO_EVTLOG_BASE_MASK MMIO_CMDBUF_BASE_MASK > >>>+#define MMIO_EVTLOG_DEFAULT_SIZE MMIO_CMDBUF_DEFAULT_SIZE > >>>+#define MMIO_EVTLOG_HEAD_MASK (((1ULL << 19) - 1) & ~0x0F) > >>>+#define MMIO_EVTLOG_TAIL_MASK MMIO_EVTLOG_HEAD_MASK > >>>+ > >>>+#define MMIO_EXCL_BASE_MASK MMIO_DEVTAB_BASE_MASK > >>>+#define MMIO_EXCL_ENABLED_MASK (1ULL << 0) > >>>+#define MMIO_EXCL_ALLOW_MASK (1ULL << 1) > >>>+#define MMIO_EXCL_LIMIT_MASK MMIO_DEVTAB_BASE_MASK > >>>+#define MMIO_EXCL_LIMIT_LOW 0xFFF > >>>+ > >>>+#define MMIO_CONTROL_IOMMUEN (1ULL << 0) > >>>+#define MMIO_CONTROL_HTTUNEN (1ULL << 1) > >>>+#define MMIO_CONTROL_EVENTLOGEN (1ULL << 2) > >>>+#define MMIO_CONTROL_EVENTINTEN (1ULL << 3) > >>>+#define MMIO_CONTROL_COMWAITINTEN (1ULL << 4) > >>>+#define MMIO_CONTROL_CMDBUFLEN (1ULL << 12) > >>>+ > >>>+#define MMIO_STATUS_EVTLOG_OF (1ULL << 0) > >>>+#define MMIO_STATUS_EVTLOG_INTR (1ULL << 1) > >>>+#define MMIO_STATUS_COMWAIT_INTR (1ULL << 2) > >>>+#define MMIO_STATUS_EVTLOG_RUN (1ULL << 3) > >>>+#define MMIO_STATUS_CMDBUF_RUN (1ULL << 4) > >>>+ > >>>+#define CMDBUF_ID_BYTE 0x07 > >>>+#define CMDBUF_ID_RSHIFT 4 > >>>+#define CMDBUF_ENTRY_SIZE 0x10 > >>>+ > >>>+#define CMD_COMPLETION_WAIT 0x01 > >>>+#define CMD_INVAL_DEVTAB_ENTRY 0x02 > >>>+#define CMD_INVAL_IOMMU_PAGES 0x03 > >>>+#define CMD_INVAL_IOTLB_PAGES 0x04 > >>>+#define CMD_INVAL_INTR_TABLE 0x05 > >>>+ > >>>+#define DEVTAB_ENTRY_SIZE 32 > >>>+ > >>>+/* Device table entry bits 0:63 */ > >>>+#define DEV_VALID (1ULL << 0) > >>>+#define DEV_TRANSLATION_VALID (1ULL << 1) > >>>+#define DEV_MODE_MASK 0x7 > >>>+#define DEV_MODE_RSHIFT 9 > >>>+#define DEV_PT_ROOT_MASK 0xFFFFFFFFFF000 > >>>+#define DEV_PT_ROOT_RSHIFT 12 > >>>+#define DEV_PERM_SHIFT 61 > >>>+#define DEV_PERM_READ (1ULL << 61) > >>>+#define DEV_PERM_WRITE (1ULL << 62) > >>>+ > >>>+/* Device table entry bits 64:127 */ > >>>+#define DEV_DOMID_ID_MASK ((1ULL << 16) - 1) > >>>+#define DEV_IOTLB_SUPPORT (1ULL << 17) > >>>+#define DEV_SUPPRESS_PF (1ULL << 18) > >>>+#define DEV_SUPPRESS_ALL_PF (1ULL << 19) > >>>+#define DEV_IOCTL_MASK (~3) > >>>+#define DEV_IOCTL_RSHIFT 20 > >>>+#define DEV_IOCTL_DENY 0 > >>>+#define DEV_IOCTL_PASSTHROUGH 1 > >>>+#define DEV_IOCTL_TRANSLATE 2 > >>>+#define DEV_CACHE (1ULL << 37) > >>>+#define DEV_SNOOP_DISABLE (1ULL << 38) > >>>+#define DEV_EXCL (1ULL << 39) > >>>+ > >>>+/* Event codes and flags, as stored in the info field */ > >>>+#define EVENT_ILLEGAL_DEVTAB_ENTRY (0x1U << 24) > >>>+#define EVENT_IOPF (0x2U << 24) > >>>+#define EVENT_IOPF_I (1U << 3) > >>>+#define EVENT_IOPF_PR (1U << 4) > >>>+#define EVENT_IOPF_RW (1U << 5) > >>>+#define EVENT_IOPF_PE (1U << 6) > >>>+#define EVENT_IOPF_RZ (1U << 7) > >>>+#define EVENT_IOPF_TR (1U << 8) > >>>+#define EVENT_DEV_TAB_HW_ERROR (0x3U << 24) > >>>+#define EVENT_PAGE_TAB_HW_ERROR (0x4U << 24) > >>>+#define EVENT_ILLEGAL_COMMAND_ERROR (0x5U << 24) > >>>+#define EVENT_COMMAND_HW_ERROR (0x6U << 24) > >>>+#define EVENT_IOTLB_INV_TIMEOUT (0x7U << 24) > >>>+#define EVENT_INVALID_DEV_REQUEST (0x8U << 24) > >>>+ > >>>+#define EVENT_LEN 16 > >>>+ > >>>+#define IOMMU_PERM_READ (1 << 0) > >>>+#define IOMMU_PERM_WRITE (1 << 1) > >>>+#define IOMMU_PERM_RW (IOMMU_PERM_READ | IOMMU_PERM_WRITE) > >>>+ > >>>+/* AMD RD890 Chipset */ > >>>+#define PCI_DEVICE_ID_RD890_IOMMU 0x20 > >>>+ > >>>+#define PCI_CAP_ID_SEC 0xf /* IOMMU capability header > >>>register */ > >>>+#define PCI_CAP_ID_MMIO_LOW 0x0 /* MMIO base address low > >>>register */ > >>>+#define PCI_CAP_ID_MMIO_HIGH 0x0 /* MMIO base address high > >>>register */ > >>>+#define PCI_CAP_ID_RANGE 0x0 /* Device range register > >>>*/ > >>>+#define PCI_CAP_ID_MISC 0x0 /* miscellaneous Information > >>>register 0 */ > >>>+#define PCI_CAP_ID_MISC1 0x0 /* miscellaneous Information > >>>register 1 */ > >>>+ > >>>+#define FEATURE_PREFETCH (1ULL<<0) > >>>+#define FEATURE_PPR (1ULL<<1) > >>>+#define FEATURE_NX (1ULL<<3) > >>>+#define FEATURE_GT (1ULL<<4) > >>>+#define FEATURE_IA (1ULL<<6) > >>>+#define FEATURE_GA (1ULL<<7) > >>>+#define FEATURE_HE (1ULL<<8) > >>>+#define FEATURE_PC (1ULL<<9) > >>>+ > >>>+/* IOMMU paging mode */ > >>>+#define GATS_MODE (6ULL << 12) > >>>+#define HATS_MODE (6ULL << 10) > >>>+/* PCI SIG constants */ > >>>+#define PCI_BUS_MAX 256 > >>>+#define PCI_SLOT_MAX 32 > >>>+#define PCI_FUNC_MAX 8 > >>>+#define PCI_DEVFN_MAX 256 > >>>+ > >>>+/* extended feature support */ > >>>+#define EXT_FEATURES (FEATURE_PREFETCH | FEATURE_PPR | FEATURE_NX | > >>>FEATURE_GT | FEATURE_IA | FEATURE_GA | FEATURE_HE | GATS_MODE | HATS_MODE ) > >>>+ > >>>+/* capabilities header */ > >>>+#define CAPAB_FEATURES (CAPAB_FLAT_EXT | CAPAB_FLAG_NPCACHE | > >>>CAPAB_FLAG_IOTLBSUP | PCI_CAP_ID_SEC | CAPAB_INIT_TYPE | > >>>CAPAB_FLAG_HTTUNNEL > >>>| CAPAB_EFR_SUP) > >>>+ > >>>+/* command constants */ > >>>+#define COM_STORE_ADDRESS_MASK 0xffffffffffff8 > >>>+#define COM_COMPLETION_STORE_MASK 0x1 > >>>+#define COM_COMPLETION_INTR 0x2 > >>>+#define COM_COMPLETION_DATA_OFF 0x8 > >>>+#define IOMMU_COMMAND_SIZE 0x10 > >>>+ > >>>+/* IOMMU default address */ > >>>+#define BUS_AMD_IOMMU_ADDR 0xfeb00000 > >>>+ > >>>+/* page management constants */ > >>>+#define IOMMU_PAGE_SHIFT 12 > >>>+#define IOMMU_PAGE_SIZE (1ULL << IOMMU_PAGE_SHIFT) > >>>+ > >>>+#define IOMMU_PAGE_SHIFT_4K 12 > >>>+#define IOMMU_PAGE_MASK_4K (~((1ULL << IOMMU_PAGE_SHIFT_4K) - 1)) > >>>+#define IOMMU_PAGE_SHIFT_2M 21 > >>>+#define IOMMU_PAGE_MASK_2M (~((1ULL << IOMMU_PAGE_SHIFT_2M) -1)) > >>>+#define IOMMU_PAGE_SHIFT_1G 30 > >>>+#define IOMMU_PAGE_MASK_1G (~((1ULL << IOMMU_PAGE_SHIFT_1G) - 1)) > >>>+ > >>>+#define TYPE_AMD_IOMMU_DEVICE "amd-iommu" > >>>+#define AMD_IOMMU_DEVICE(obj)\ > >>>+ OBJECT_CHECK(AMDIOMMUState, (obj), TYPE_AMD_IOMMU_DEVICE) > >>>+ > >>>+typedef struct AMDIOMMUState AMDIOMMUState; > >>>+ > >>>+typedef struct AMDIOMMUAddressSpace{ > >>>+ uint8_t bus_num; /* bus number > >>>*/ > >>>+ uint8_t devfn; /* device function > >>>*/ > >>>+ AMDIOMMUState *iommu_state; /* IOMMU - one per machine > >>>*/ > >>>+ MemoryRegion iommu; /* Device's iommu region > >>>*/ > >>>+ AddressSpace as; /* device's corresponding address space > >>>*/ > >>>+}AMDIOMMUAddressSpace; > >>>+ > >>>+struct AMDIOMMUState { > >>>+ PCIDevice dev; /* The PCI device itself */ > >>>+ > >>>+ uint32_t version; > >>>+ > >>>+ int capab_offset; /* capability offset pointer */ > >>>+ unsigned char *capab; /* capabilities registers */ > >>>+ > >>>+ bool enabled; /* IOMMU enabled */ > >>>+ bool ats_enabled; /* address translation enabled */ > >>>+ bool cmdbuf_enabled; /* command buffer enabled */ > >>>+ bool evtlog_enabled; /* event log enabled */ > >>>+ bool excl_enabled; > >>>+ > >>>+ dma_addr_t devtab; /* base address device table */ > >>>+ size_t devtab_len; /* device table length */ > >>>+ > >>>+ dma_addr_t cmdbuf; /* command buffer base address */ > >>>+ uint64_t cmdbuf_len; /* command buffer length */ > >>>+ unsigned cmdbuf_head; /* current IOMMU read position */ > >>>+ unsigned cmdbuf_tail; /* next Software write position */ > >>>+ int completion_wait_intr; > >>>+ > >>>+ dma_addr_t evtlog; /* base address event log */ > >>>+ size_t evtlog_intr; > >>>+ size_t evtlog_len; /* event log length */ > >>>+ size_t evtlog_head; /* event log head */ > >>>+ size_t evtlog_tail; /* event log tail */ > >>>+ > >>>+ /* unused for now */ > >>>+ dma_addr_t excl_base; /* base DVA - IOMMU exclusion range */ > >>>+ dma_addr_t excl_limit; /* limit of IOMMU exclusion range */ > >>>+ bool excl_allow; /* translate accesses to the exclusion > >>>range */ > >>>+ bool excl_enable; /* exclusion range enabled */ > >>>+ > >>>+ MemoryRegion mmio; /* MMIO region */ > >>>+ unsigned char mmior[MMIO_SIZE]; > >>>+ bool mmio_enabled; > >>>+ > >>>+ /* IOMMU function */ > >>>+ MemoryRegionIOMMUOps iommu_ops; > >>>+ > >>>+ /* for each served device */ > >>>+ AMDIOMMUAddressSpace **address_spaces[PCI_BUS_MAX]; > >>>+}; > >>>+ > >>>+typedef struct AMDIOMMUEvent { > >>>+ uint16_t devfn; > >>>+ uint16_t reserved; > >>>+ uint16_t domid; > >>>+ uint16_t info; > >>>+ uint16_t addr; > >>>+} __attribute__((packed)) AMDIOMMUEvent; > >>>+ > >>>+AddressSpace *bridge_host_amd_iommu(PCIBus *bus, void *opaque, int > >>>devfn); > >>>+ > >>>+#endif > >>> > >> > >>Regards, > >>Valentine > > -- > С уважением, > Синицын Валентин