This patch introduces support for device passthrough from the host to a paravirtualized guest.
A new command-line option, -passthrough is added. For example, to invoke it for an Ethernet device sitting at PCI bus:dev.fn 04:08.0 with host IRQ 18, use this: -passthrough Ethernet/04:08.0-18 The host driver is to be removed before doing the passthrough. Signed-off-by: Amit Shah <[EMAIL PROTECTED]> --- qemu/Makefile | 6 +- qemu/Makefile.target | 4 +- qemu/exec.c | 1 + qemu/hw/apic.c | 2 + qemu/hw/passthrough/neo_pci_tree.h | 44 +++ qemu/hw/passthrough/passthrough.c | 604 ++++++++++++++++++++++++++++++++++++ qemu/hw/passthrough/passthrough.h | 64 ++++ qemu/hw/pc.c | 3 + qemu/hw/pci.c | 5 + qemu/hw/piix_pci.c | 6 + qemu/vl.c | 6 + tools/pci_barsize.c | 53 ++++ tools/pci_mmio.c | 82 +++++ 13 files changed, 876 insertions(+), 4 deletions(-) create mode 100644 qemu/hw/passthrough/neo_pci_tree.h create mode 100644 qemu/hw/passthrough/passthrough.c create mode 100644 qemu/hw/passthrough/passthrough.h create mode 100644 tools/pci_barsize.c create mode 100644 tools/pci_mmio.c diff --git a/qemu/Makefile b/qemu/Makefile index 053c88c..3e599f3 100644 --- a/qemu/Makefile +++ b/qemu/Makefile @@ -37,7 +37,7 @@ qemu-img$(EXESUF): qemu-img.c cutils.c block.c block-raw.c block-cow.c block-qco dyngen$(EXESUF): dyngen.c $(HOST_CC) $(CFLAGS) $(CPPFLAGS) $(BASE_CFLAGS) -o $@ $^ -clean: +clean: # avoid old build problems by removing potentially incorrect old files rm -f config.mak config.h op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h rm -f *.o *.a $(TOOLS) dyngen$(EXESUF) TAGS cscope.* *.pod *~ */*~ @@ -88,8 +88,8 @@ endif test speed test2: all $(MAKE) -C tests $@ -TAGS: - etags *.[ch] tests/*.[ch] +TAGS: + etags *.[ch] tests/*.[ch] hw/passthrough/*.[ch] cscope: rm -f ./cscope.* diff --git a/qemu/Makefile.target b/qemu/Makefile.target index 65f449e..9a96011 100644 --- a/qemu/Makefile.target +++ b/qemu/Makefile.target @@ -24,7 +24,7 @@ ifeq ($(TARGET_ARCH), sparc64) TARGET_BASE_ARCH:=sparc endif TARGET_PATH=$(SRC_PATH)/target-$(TARGET_BASE_ARCH) -VPATH=$(SRC_PATH):$(TARGET_PATH):$(SRC_PATH)/hw:$(SRC_PATH)/audio +VPATH=$(SRC_PATH):$(TARGET_PATH):$(SRC_PATH)/hw:$(SRC_PATH)/hw/passthrough:$(SRC_PATH)/audio CPPFLAGS=-I. -I.. -I$(TARGET_PATH) -I$(SRC_PATH) ifdef CONFIG_DARWIN_USER VPATH+=:$(SRC_PATH)/darwin-user @@ -454,6 +454,8 @@ VL_OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o $(AUDIODRV) VL_OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o VL_OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o VL_OBJS+= usb-uhci.o smbus_eeprom.o vmmouse.o vmport.o vmware_vga.o +# passthrough support +VL_OBJS+= passthrough.o CPPFLAGS += -DHAS_AUDIO -DHAS_AUDIO_CHOICE endif ifeq ($(TARGET_BASE_ARCH), ppc) diff --git a/qemu/exec.c b/qemu/exec.c index 3e588d5..7a21ca5 100644 --- a/qemu/exec.c +++ b/qemu/exec.c @@ -2484,6 +2484,7 @@ int cpu_register_io_memory(int io_index, if (io_mem_nb >= IO_MEM_NB_ENTRIES) return -1; io_index = io_mem_nb++; + fprintf(stderr, "iomem index %d out of %d\n", io_index, IO_MEM_NB_ENTRIES); } else { if (io_index >= IO_MEM_NB_ENTRIES) return -1; diff --git a/qemu/hw/apic.c b/qemu/hw/apic.c index 60d31fa..5b1bdf4 100644 --- a/qemu/hw/apic.c +++ b/qemu/hw/apic.c @@ -349,6 +349,7 @@ static void apic_eoi(APICState *s) /* XXX: send the EOI packet to the APIC bus to allow the I/O APIC to set the remote IRR bit for level triggered interrupts. */ apic_update_irq(s); + pt_ack_mirq(isrv); } static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask, @@ -1122,6 +1123,7 @@ static void ioapic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t va } else { s->ioredtbl[index] &= ~0xffffffffULL; s->ioredtbl[index] |= val; + pt_set_vector(index, (val << 24) >> 24); } ioapic_service(s); } diff --git a/qemu/hw/passthrough/neo_pci_tree.h b/qemu/hw/passthrough/neo_pci_tree.h new file mode 100644 index 0000000..79adef9 --- /dev/null +++ b/qemu/hw/passthrough/neo_pci_tree.h @@ -0,0 +1,44 @@ +/************************************************************************************************* + + Some data structures to save the result of the PCI probing. + + Copyright (c) 2007, Neocleus: Guy Zana, Alex Novik + +**************************************************************************************************/ + +#ifndef __XC_NEO_PCI_TREE_H__ +#define __XC_NEO_PCI_TREE_H__ + +#include <linux/types.h> + +typedef __u8 u8; +typedef __u16 u16; +typedef __u32 u32; +typedef __u64 u64; + + +/************************************ Data Types / Structures ************************************/ + +typedef u32 pciaddr_t; + +#define MAX_IO_REGIONS (6) + +typedef struct pci_region_s { + int type; /* Memory or port I/O */ + int valid; + pciaddr_t base_addr; + pciaddr_t size; /* size of the region */ + int resource_fd; +} pci_region_t; + +typedef struct neo_pci_dev_s { + u8 bus, dev, func; /* Bus inside domain, device and function */ + int irq; /* IRQ number */ + u16 region_number; /* number of active regions */ + + /* Port I/O or MMIO Regions */ + pci_region_t regions[MAX_IO_REGIONS]; + int config_fd; +} neo_pci_dev_t; + +#endif /* __XC_NEO_PCI_TREE_H__ */ diff --git a/qemu/hw/passthrough/passthrough.c b/qemu/hw/passthrough/passthrough.c new file mode 100644 index 0000000..42540a7 --- /dev/null +++ b/qemu/hw/passthrough/passthrough.c @@ -0,0 +1,604 @@ +/****************************************************************************** + + PCI config handling, MMIO & PIO access through dom0 is done for + debugging needs. + + Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED]) + Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED]) + +******************************************************************************/ +#include <stdio.h> +#include <pthread.h> +#include <sys/io.h> +#include <sys/ioctl.h> + +#include "neo_pci_tree.h" + +typedef u64 resource_size_t; +#define __deprecated + +#include <linux/ioport.h> +#include "vl.h" +#include "passthrough.h" + +#ifdef USE_KVM +#include "qemu-kvm.h" +#include <linux/kvm_para.h> +extern kvm_context_t kvm_context; +#endif +extern FILE *logfile; + +CPUReadMemoryFunc *pt_mmio_read_cb[3] = { + pt_mmio_readb, + pt_mmio_readw, + pt_mmio_readl +}; + +CPUWriteMemoryFunc *pt_mmio_write_cb[3] = { + pt_mmio_writeb, + pt_mmio_writew, + pt_mmio_writel +}; + +//#define PT_DEBUG + +#ifdef PT_DEBUG +#define DEBUG(fmt, args...) fprintf(stderr, "%s: " fmt, __FUNCTION__ , ## args) +#else +#define DEBUG(fmt, args...) +#endif + +#define pt_mmio_write(suffix, type) \ +void pt_mmio_write##suffix(void *opaque, target_phys_addr_t e_phys, \ + uint32_t value) \ +{ \ + pt_region_t *r_access = (pt_region_t *)opaque; \ + void *r_virt = (u8 *)r_access->r_virtbase + \ + (e_phys - r_access->e_physbase); \ + if (r_access->debug & PT_DEBUG_MMIO) { \ + fprintf(logfile, "pt_mmio_write" #suffix \ + ": e_physbase=%p e_phys=%p r_virt=%p value=%08x\n", \ + (void *)r_access->e_physbase, (void *)e_phys, \ + r_virt, value); \ + } \ + *(type *)r_virt = (type)value; \ +} + +pt_mmio_write(b, u8) +pt_mmio_write(w, u16) +pt_mmio_write(l, u32) + +#define pt_mmio_read(suffix, type) \ +uint32_t pt_mmio_read##suffix(void *opaque, target_phys_addr_t e_phys) \ +{ \ + pt_region_t *r_access = (pt_region_t *)opaque; \ + void *r_virt = (u8 *)r_access->r_virtbase + \ + (e_phys - r_access->e_physbase); \ + uint32_t value = (u32) (*(type *) r_virt); \ + if (r_access->debug & PT_DEBUG_MMIO) { \ + fprintf(logfile, \ + "pt_mmio_read" #suffix ": e_physbase=%p " \ + "e_phys=%p r_virt=%p value=%08x\n", \ + (void *)r_access->e_physbase, \ + (void *)e_phys, r_virt, value); \ + } \ + return value; \ +} + +pt_mmio_read(b, u8) +pt_mmio_read(w, u16) +pt_mmio_read(l, u32) + +#define pt_ioport_write(suffix) \ +void pt_ioport_write##suffix(void *opaque, uint32_t addr, uint32_t value) \ +{ \ + pt_region_t *r_access = (pt_region_t *)opaque; \ + uint32_t r_pio = (unsigned long)r_access->r_virtbase \ + + (addr - r_access->e_physbase); \ + if (r_access->debug & PT_DEBUG_PIO) { \ + fprintf(logfile, "pt_ioport_write" #suffix \ + ": r_pio=%08x e_physbase=%08x" \ + " r_virtbase=%08lx value=%08x\n", \ + r_pio, (int)r_access->e_physbase, \ + (unsigned long)r_access->r_virtbase, value); \ + } \ + out##suffix(value, r_pio); \ +} + +pt_ioport_write(b) +pt_ioport_write(w) +pt_ioport_write(l) + +#define pt_ioport_read(suffix) \ +uint32_t pt_ioport_read##suffix(void *opaque, uint32_t addr) \ +{ \ + pt_region_t *r_access = (pt_region_t *)opaque; \ + uint32_t r_pio = (addr - r_access->e_physbase) \ + + (unsigned long)r_access->r_virtbase; \ + uint32_t value = in##suffix(r_pio); \ + if (r_access->debug & PT_DEBUG_PIO) { \ + fprintf(logfile, "pt_ioport_read" #suffix \ + ": r_pio=%08x e_physbase=%08x r_virtbase=%08lx "\ + "value=%08x\n", \ + r_pio, (int)r_access->e_physbase, \ + (unsigned long)r_access->r_virtbase, value); \ + } \ + return (value); \ +} + +pt_ioport_read(b) +pt_ioport_read(w) +pt_ioport_read(l) + +static void pt_iomem_map(PCIDevice * d, int region_num, + uint32_t e_phys, uint32_t e_size, int type) +{ + pt_dev_t *r_dev = (pt_dev_t *) d; + + r_dev->v_addrs[region_num].e_physbase = e_phys; + + DEBUG("e_phys=%08x r_virt=%p type=%d len=%08x region_num=%d \n", + e_phys, r_dev->v_addrs[region_num].r_virtbase, type, e_size, + region_num); + + cpu_register_physical_memory(e_phys, + r_dev->dev.io_regions[region_num].size, + r_dev->v_addrs[region_num].memory_index); +} + + +static void pt_ioport_map(PCIDevice * pci_dev, int region_num, + uint32_t addr, uint32_t size, int type) +{ + pt_dev_t *r_dev = (pt_dev_t *) pci_dev; + int i; + uint32_t ((*rf[])(void *, uint32_t)) = { pt_ioport_readb, + pt_ioport_readw, + pt_ioport_readl + }; + void ((*wf[])(void *, uint32_t, uint32_t)) = { pt_ioport_writeb, + pt_ioport_writew, + pt_ioport_writel + }; + + r_dev->v_addrs[region_num].e_physbase = addr; + fprintf(logfile, "pt_ioport_map: address=0x%x type=0x%x len=%d" + "region_num=%d \n", addr, type, size, region_num); + + for (i = 0; i < 3; i++) { + register_ioport_write(addr, size, 1<<i, wf[i], + (void *) (r_dev->v_addrs + region_num)); + register_ioport_read(addr, size, 1<<i, rf[i], + (void *) (r_dev->v_addrs + region_num)); + } +} + +static void pt_pci_write_config(PCIDevice * d, uint32_t address, uint32_t val, + int len) +{ + int fd; + + DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", + ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), (uint16_t) address, + val, len); + + if (address == 0x4) + pci_default_write_config(d, address, val, len); + + if ((address >= 0x10 && address <= 0x24) || address == 0x34 || + address == 0x3c || address == 0x3d) { + /* used for update-mappings (BAR emulation) */ + pci_default_write_config(d, address, val, len); + return; + } + + DEBUG("NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n", + ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), (uint16_t) address, + val, len); + fd = ((pt_dev_t *)d)->real_device.config_fd; + lseek(fd, address, SEEK_SET); + write(fd, &val, len); +} + +static uint32_t pt_pci_read_config(PCIDevice *d, uint32_t address, int len) +{ + uint32_t val = 0; + int fd; + + if ((address >= 0x10 && address <= 0x24) || address == 0x34 || + address == 0x3c || address == 0x3d) { + val = pci_default_read_config(d, address, len); + DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", + (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, + len); + return (val); + } + + /* vga specific, remove later */ + if (address == 0xFC) + goto do_log; + + fd = ((pt_dev_t *)d)->real_device.config_fd; + lseek(fd, address, SEEK_SET); + read(fd, &val, len); + + do_log: + DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", + (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); + + /* kill the special capabilities */ + if (address == 4 && len == 4) + val &= ~0x100000; + else if (address == 6) + val &= ~0x10; + + return (val); +} + + +int pt_register_regions(pci_region_t * io_regions, + unsigned long regions_num, pt_dev_t * pci_dev) +{ + uint32_t i; + pci_region_t *cur_region = io_regions; + + for (i = 0; i < regions_num; i++, cur_region++) { + if (!cur_region->valid) + continue; +#ifdef PT_DEBUG + pci_dev->v_addrs[i].debug |= PT_DEBUG_MMIO | PT_DEBUG_PIO; +#endif + pci_dev->v_addrs[i].num = i; + + /* handle memory io regions */ + if (cur_region->type & IORESOURCE_MEM) { + int t = cur_region->type & IORESOURCE_PREFETCH ? PCI_ADDRESS_SPACE_MEM_PREFETCH : PCI_ADDRESS_SPACE_MEM; + + /* map physical memory */ + pci_dev->v_addrs[i].e_physbase = cur_region->base_addr; + pci_dev->v_addrs[i].r_virtbase = + mmap(NULL, (cur_region->size + 0xFFF) & 0xFFFFF000, PROT_WRITE | PROT_READ, + MAP_SHARED, cur_region->resource_fd, (off_t) 0); + + if ((void *) -1 == + pci_dev->v_addrs[i].r_virtbase) { + fprintf(logfile, "NEO: Error: Couldn't mmap 0x%x!\n", + (uint32_t) (cur_region->base_addr)); + return (-1); + } + + /* add offset */ + pci_dev->v_addrs[i].r_virtbase += (cur_region->base_addr & 0xFFF); + + pci_register_io_region((PCIDevice *) pci_dev, i, cur_region->size, t, pt_iomem_map); + + pci_dev->v_addrs[i].memory_index = + cpu_register_io_memory(0, pt_mmio_read_cb, pt_mmio_write_cb, + (void *) &(pci_dev->v_addrs[i])); + + continue; + } + /* handle port io regions */ + + pci_register_io_region((PCIDevice *) pci_dev, i, cur_region->size, PCI_ADDRESS_SPACE_IO, + pt_ioport_map); + + pci_dev->v_addrs[i].e_physbase = cur_region->base_addr; + pci_dev->v_addrs[i].r_virtbase = (void *)(long)cur_region->base_addr; + pci_dev->v_addrs[i].memory_index = 0; // not relevant for port io + } + + /* success */ + return (0); + +} + +int +pt_get_real_device(pt_dev_t *pci_dev, uint8_t r_bus, uint8_t r_dev, + uint8_t r_func) +{ + char dir[128], name[128], comp[16]; + int fd, r = 0; + FILE *f; + unsigned long long start, end, size, flags; + pci_region_t *rp; + neo_pci_dev_t *dev = &pci_dev->real_device; + + dev->region_number = 0; + + sprintf(dir, "/sys/bus/pci/devices/0000:%02x:%02x.%x/", + r_bus, r_dev, r_func); + strcpy(name, dir); + strcat(name, "config"); + if ((fd = open(name, O_RDWR)) == -1) { + fprintf(logfile, "%s: %m\n", name); + return 1; + } + dev->config_fd = fd; + read(fd, pci_dev->dev.config, sizeof pci_dev->dev.config); + + strcpy(name, dir); + strcat(name, "resource"); + if ((f = fopen(name, "r")) == NULL) { + fprintf(logfile, "%s: %m\n", name); + return 1; + } + + for (r = 0; fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) == 3; r++) { + rp = dev->regions + r; + rp->valid = 0; + size = end - start + 1; + flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH; + if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0) + continue; + if (flags & IORESOURCE_MEM) { + flags &= ~IORESOURCE_IO; + sprintf(comp, "resource%d", r); + strcpy(name, dir); + strcat(name, comp); + if ((fd = open(name, O_RDWR)) == -1) + continue; // probably ROM + rp->resource_fd = fd; + } else + flags &= ~IORESOURCE_PREFETCH; + + rp->type = flags; + rp->valid = 1; + rp->base_addr = start; + rp->size = size; + fprintf(logfile, "region %d size %d start 0x%x type %d resource_fd %d\n", r, rp->size, start, rp->type, rp->resource_fd); + } + fclose(f); + + dev->region_number = r; + return 0; +} + +/* From include/linux/pci.h in the kernel sources */ +#define PCI_DEVFN(slot,func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) + +pt_dev_t *register_real_device(PCIBus * e_bus, const char *e_dev_name, + int e_devfn, uint8_t r_bus, uint8_t r_dev, + uint8_t r_func, uint32_t machine_irq) +{ + int rc; + pt_dev_t *pci_dev; + uint8_t e_device, e_intx; + struct kvm_pv_passthrough_dev pv_pci_dev; + + fprintf(logfile, "register_real_device: Registering real physical device %s (devfn=0x%x)\n", e_dev_name, e_devfn); + + pci_dev = (pt_dev_t *) pci_register_device(e_bus, e_dev_name, sizeof(pt_dev_t), e_devfn, + pt_pci_read_config, pt_pci_write_config); + + if (NULL == pci_dev) { + fprintf(logfile, "register_real_device: Error: Couldn't register real device %s\n", e_dev_name); + return (NULL); + } + if (pt_get_real_device(pci_dev, r_bus, r_dev, r_func)) { + fprintf(logfile, "register_real_device: Error: Couldn't get real device (%s)!\n", e_dev_name); + return NULL; + } + + /* handle real device's MMIO/PIO BARs */ + if (pt_register_regions(pci_dev->real_device.regions, pci_dev->real_device.region_number, pci_dev)) + return (NULL); + + /* handle interrupt routing */ + e_device = (pci_dev->dev.devfn >> 3) & 0x1f; + e_intx = pci_dev->dev.config[0x3d] - 1; + pci_dev->intpin = e_intx; + pci_dev->run = 0; + pci_dev->mirq = machine_irq; + + /* bind machine_irq to device */ + if (machine_irq) { + fprintf(logfile, "Binding mirq %u to device=0x%x intpin=0x%x\n", + machine_irq, e_device, pci_dev->intpin); + rc = pt_bind_mirq(r_bus, r_dev, r_func); + if (rc) { + fprintf(logfile, "pt_bind %d failed rc=%d\n", pci_dev->mirq, rc); + return NULL; + } + sprintf(pci_dev->sirq, "%d", pci_dev->mirq); + } + +#ifdef USE_KVM + /* Let the host kernel know we'll dealing with this device hereafter */ + pv_pci_dev.guest.busnr = pci_bus_num(e_bus); + pv_pci_dev.guest.devfn = PCI_DEVFN(e_device, r_func); + pv_pci_dev.mach.busnr = r_bus; + pv_pci_dev.mach.devfn = PCI_DEVFN(r_dev, r_func); + + rc = ioctl(kvm_get_vm_fd(kvm_context), KVM_ASSIGN_PV_PCI_DEV, + &pv_pci_dev); + if (rc == -1) { + fprintf(stderr, "Could not notify kernel about passthrough " + "device\n"); + perror("pt-ioctl:"); + return NULL; + } +#endif + fprintf(logfile, "register_real_device: Real physical device (%02x:%02x.%x) \"%s\" registered successfully!\n", r_bus, r_dev, r_func, e_dev_name); + + return (pci_dev); +} + +#define MAX_PTDEVS 4 +struct { + char name[128]; + int bus; + int dev; + int func; + int irq; + pt_dev_t *ptdev; +} ptdevs[MAX_PTDEVS]; + +int nptdevs; + +static QEMUBH *ptbh; +static int irqfd; +static pt_dev_t **apicv[0xfe]; //0x10 - 0xfe according to intel IOAPIC spec +#define IRQHOOK_DEV "/dev/irqhook" +static pthread_t irqthread; + +void pt_irq(void *arg) +{ + char buf[20]; + int irq; + int i; + pt_dev_t *dev; + + if (!irqfd) { + fprintf(stderr, "pt_irq: irqfd %d, exiting\n", irqfd); + exit(-1); + } + + for (;;) { + if (read(irqfd, buf, 20) == -1) { + if (errno == EINTR) continue; + perror("irq read: "); + } + + irq = atoi(buf); + DEBUG("read irq %d\n", irq); + if (!irq) continue; + + for (i = 0; i < nptdevs; i++) if ((dev = ptdevs[i].ptdev) && dev->mirq == irq) dev->run = 1; + qemu_bh_schedule(ptbh); + } +} + +static void pt_bh(void *p) +{ + int i; + pt_dev_t *dev; + for (i = 0; i < nptdevs; i++) + if ((dev = ptdevs[i].ptdev) && dev->run) { + qemu_set_irq(dev->dev.irq[dev->intpin], 1); + dev->run = 0; + if (cpu_single_env) cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT); + } +} + +int pt_init(PCIBus * bus) +{ + pt_dev_t *dev; + int i, ret = 0; + + iopl(3); + + if (!(ptbh = qemu_bh_new(pt_bh, 0))) { + fprintf(logfile, "Couldn't register PT callback\n"); + return -1; + } + + if (!(irqfd = open(IRQHOOK_DEV, O_RDWR))) { + fprintf(logfile, "Couldn't open PT irqhook dev\n"); + return -1; + } + + if (pthread_create(&irqthread, 0, pt_irq, 0)) { + fprintf(logfile, "Couldn't create IRQ thread\n"); + return -1; + } + + for (i = 0; i < nptdevs; i++) { + dev = register_real_device(bus, ptdevs[i].name, -1, ptdevs[i].bus, ptdevs[i].dev, ptdevs[i].func, ptdevs[i].irq); + + if (dev == NULL) { + fprintf(logfile, "NEO: Error: Couldn't register %s\n", "AUDIO_0"); + ret = -1; + } + ptdevs[i].ptdev = dev; + } + + /* success */ + return (ret); +} + +void +add_passthrough_device(char *arg) +{ + /* name/bus:dev.func-intr */ + char *cp, *cp1; + + if (nptdevs >= MAX_PTDEVS) { + fprintf(logfile, "Too many passthrough devices (max %d)\n", MAX_PTDEVS); + return; + } + strcpy(ptdevs[nptdevs].name, arg); + cp = strchr(ptdevs[nptdevs].name, '/'); + if (cp == NULL) + goto bad; + *cp++ = 0; + + ptdevs[nptdevs].bus = strtoul(cp, &cp1, 16); + if (*cp1 != ':') + goto bad; + cp = cp1 + 1; + + ptdevs[nptdevs].dev = strtoul(cp, &cp1, 16); + if (*cp1 != '.') + goto bad; + cp = cp1 + 1; + + ptdevs[nptdevs].func = strtoul(cp, &cp1, 16); + if (*cp1 != '-') + goto bad; + cp = cp1 + 1; + + ptdevs[nptdevs].irq = strtoul(cp, &cp1, 0); + if (*cp1 != 0) + goto bad; + + nptdevs++; + return; + bad: + fprintf(logfile, "passthrough arg (%s) not in the form of name/bus:dev.func-intr\n", arg); +} + +void pt_ack_mirq(int vector) +{ + pt_dev_t **p = apicv[vector]; + if (!p) return; + + for (; *p; *p++) { + write(irqfd, (*p)->sirq, strlen((*p)->sirq)); + qemu_set_irq((*p)->dev.irq[(*p)->intpin], 0); + } +} + +int pt_bind_mirq(int bus, int dev, int fn) +{ + char s[64]; + sprintf(s, "+%d:%d.%d", bus, dev, fn); + if (write(irqfd, s, strlen(s)) != strlen(s)) { + perror("pt_bind_mirq:"); + exit(-1); + } + return 0; +} + +void pt_set_vector(int irq, int vector) +{ + int i, j; + int pin = piix3_get_pin(irq); + pt_dev_t *pt, **p; + + DEBUG("irq %d vector %d\n", irq, vector); + if (vector > 0xfe) return; + for (i = 0; i < nptdevs; i++) { + pt = ptdevs[i].ptdev; + if (!pt || pt->bound) continue; + if (pci_map_irq(&pt->dev, pt->intpin) == pin) { + for (j = 1, p = apicv[vector]; p; j++, *p++); + apicv[vector] = realloc(apicv[vector], j * sizeof pt); + p = &apicv[vector][j]; + *(p-1) = pt; + *p = 0; + pt->bound = 1; + } + } + DEBUG("done\n"); +} diff --git a/qemu/hw/passthrough/passthrough.h b/qemu/hw/passthrough/passthrough.h new file mode 100644 index 0000000..3d8542d --- /dev/null +++ b/qemu/hw/passthrough/passthrough.h @@ -0,0 +1,64 @@ +/************************************************************************************************* + + PCI config handling, MMIO & PIO access through dom0 is done for debugging needs. + + Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED]) + Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED]) + +**************************************************************************************************/ + +#include <sys/mman.h> +#include "vl.h" + +#ifndef __PASSTHROUGH_H__ +#define __PASSTHROUGH_H__ + +#define PT_DEBUG_PIO (0x01) +#define PT_DEBUG_MMIO (0x02) + +typedef struct pt_region_s { + target_phys_addr_t e_physbase; + uint32_t memory_index; + void *r_virtbase; /* mmapped access address */ + int num; /* our index within v_addrs[] */ + uint32_t debug; +} pt_region_t; + +typedef struct pt_dev_s { + PCIDevice dev; + int intpin; + uint8_t debug_flags; + pt_region_t v_addrs[PCI_NUM_REGIONS]; + neo_pci_dev_t real_device; + int run; + int mirq; + char sirq[4]; + int bound; +} pt_dev_t; + + +/* MMIO access functions */ +uint32_t pt_mmio_readb(void *opaque, target_phys_addr_t e_phys); +uint32_t pt_mmio_readw(void *opaque, target_phys_addr_t e_phys); +uint32_t pt_mmio_readl(void *opaque, target_phys_addr_t e_phys); +void pt_mmio_writeb(void *opaque, target_phys_addr_t e_phys, uint32_t value); +void pt_mmio_writew(void *opaque, target_phys_addr_t e_phys, uint32_t value); +void pt_mmio_writel(void *opaque, target_phys_addr_t e_phys, uint32_t value); + +/* PIO access functions */ +uint32_t pt_ioport_readb(void *opaque, uint32_t addr); +uint32_t pt_ioport_readw(void *opaque, uint32_t addr); +uint32_t pt_ioport_readl(void *opaque, uint32_t addr); +void pt_ioport_writeb(void *opaque, uint32_t addr, uint32_t value); +void pt_ioport_writew(void *opaque, uint32_t addr, uint32_t value); +void pt_ioport_writel(void *opaque, uint32_t addr, uint32_t value); + +/* Registration functions */ +int register_pt_pio_region(uint32_t pio_start, uint32_t length, + uint8_t do_logging); +int register_pt_mmio_region(uint32_t mmio_addr, uint32_t length, + uint8_t do_logging); + +#define logfile stderr + +#endif /* __PASSTHROUGH_H__ */ diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c index 8aae814..d7892e0 100644 --- a/qemu/hw/pc.c +++ b/qemu/hw/pc.c @@ -888,6 +888,9 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size, int boot_device, } } + /* Initialize pass-through */ + pt_init(pci_bus); + rtc_state = rtc_init(0x70, i8259[8]); register_ioport_read(0x92, 1, 1, ioport92_read, NULL); diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c index 7e8adc4..8be3645 100644 --- a/qemu/hw/pci.c +++ b/qemu/hw/pci.c @@ -457,6 +457,11 @@ static void pci_set_irq(void *opaque, int irq_num, int level) bus->set_irq(bus->irq_opaque, irq_num, bus->irq_count[irq_num] != 0); } +int pci_map_irq(PCIDevice *pci_dev, int pin) +{ + return pci_dev->bus->map_irq(pci_dev, pin); +} + /***********************************************************/ /* monitor info on PCI */ diff --git a/qemu/hw/piix_pci.c b/qemu/hw/piix_pci.c index 8c00f0d..a9d87bd 100644 --- a/qemu/hw/piix_pci.c +++ b/qemu/hw/piix_pci.c @@ -225,6 +225,12 @@ static void piix3_set_irq(qemu_irq *pic, int irq_num, int level) } } +int piix3_get_pin(int pic_irq) +{ + int i; + for (i = 0; i < 4; i++) if (piix3_dev->config[0x60+i] == pic_irq) return i; +} + static void piix3_reset(PCIDevice *d) { uint8_t *pci_conf = d->config; diff --git a/qemu/vl.c b/qemu/vl.c index 634fb34..21b3d47 100644 --- a/qemu/vl.c +++ b/qemu/vl.c @@ -1182,6 +1182,7 @@ static void host_alarm_handler(int host_signum) SetEvent(data->host_alarm); #endif CPUState *env = cpu_single_env; + if (env) { /* stop the currently executing cpu because a timer occured */ cpu_interrupt(env, CPU_INTERRUPT_EXIT); @@ -7532,6 +7533,7 @@ enum { QEMU_OPTION_vnc, QEMU_OPTION_no_acpi, QEMU_OPTION_no_kvm, + QEMU_OPTION_passthrough, QEMU_OPTION_no_kvm_irqchip, QEMU_OPTION_no_reboot, QEMU_OPTION_show_cursor, @@ -7611,6 +7613,7 @@ const QEMUOption qemu_options[] = { #endif #ifdef USE_KVM { "no-kvm", 0, QEMU_OPTION_no_kvm }, + { "passthrough", HAS_ARG, QEMU_OPTION_passthrough }, { "no-kvm-irqchip", 0, QEMU_OPTION_no_kvm_irqchip }, #endif #if defined(TARGET_PPC) || defined(TARGET_SPARC) @@ -8427,6 +8430,9 @@ int main(int argc, char **argv) case QEMU_OPTION_no_kvm: kvm_allowed = 0; break; + case QEMU_OPTION_passthrough: + add_passthrough_device(optarg); + break; case QEMU_OPTION_no_kvm_irqchip: kvm_irqchip = 0; break; diff --git a/tools/pci_barsize.c b/tools/pci_barsize.c new file mode 100644 index 0000000..dd230c9 --- /dev/null +++ b/tools/pci_barsize.c @@ -0,0 +1,53 @@ +#include <stdio.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdlib.h> + +int +panic(char *msg) +{ + perror(msg); + exit(1); +} + +int +main(int argc, char **argv) +{ + unsigned l, b, sz; + int fd, ismem, bar = 0, offs; + + if (argc < 2) + panic("usage: pci_barsize <file> [bar no]"); + + if ((fd = open(argv[1], O_RDWR)) < 0) + panic("open"); + + if (argc > 2) + bar = strtoul(argv[2], 0, 0); + if (bar < 0 || bar > 5) + panic("bar range 0-5"); + + offs = 0x10 + bar * 4; + lseek(fd, offs, 0); + read(fd, &l, sizeof(l)); + printf("bar %d (offs 0x%x) == %x\n", bar, offs, l); + + ismem = !(l & 0x01); + + b = ~0; + lseek(fd, offs, 0); + write(fd, &b, sizeof(b)); + + lseek(fd, offs, 0); + read(fd, &b, sizeof(b)); + sz = ~(b & (ismem ? ~0x15 : ~0x1)) + 1; + printf("bar %d %s size 0x%x == %ldKB (%x)\n", + bar, ismem ? "memory" : "IO", sz, sz / 1024, b); + + lseek(fd, offs, 0); + write(fd, &l, sizeof(l)); + + return 0; +} diff --git a/tools/pci_mmio.c b/tools/pci_mmio.c new file mode 100644 index 0000000..6e91571 --- /dev/null +++ b/tools/pci_mmio.c @@ -0,0 +1,82 @@ +#include <stdio.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdlib.h> +#include <sys/mman.h> + +int +panic(char *msg) +{ + perror(msg); + exit(1); +} + +int +main(int argc, char **argv) +{ + unsigned sz; + int fd, cnt, rsz, offs = 0; + void *map; + struct stat st; + + if (argc < 2) + panic("usage: pci_mmio <resouce-file> [offset [count]]"); + + if ((fd = open(argv[1], O_RDWR)) < 0) + panic("open"); + + if (fstat(fd, &st) < 0) + panic("fstat"); + cnt = sz = st.st_size; + + if (argc > 2) + offs = strtoul(argv[2], 0, 0); + if (argc > 3) + cnt = strtoul(argv[3], 0, 0); + + if (cnt < 0 || cnt > sz) + panic("bad count"); + if (offs < 0 || offs > sz) + panic("bad offset"); + if (offs + cnt > sz) { + cnt = sz - offs; + fprintf(stderr, "count truncated to %d", cnt); + } + if (cnt > 4 && offs % 4) + panic("read bigger than 4 must be 4 bytes aligned"); + if (cnt == 2 && offs % 2) + panic("2 bytes read must be 2 bytes aligned"); + if (cnt != 1 && cnt != 2 && cnt != 4 && cnt % 4) + panic("counts must be 1, 2, 4 or 4*n"); + + fprintf(stderr, "reading %s [%d:%d]\n", argv[1], offs, offs + cnt); + map = mmap(NULL, sz, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); + + if (!map) + panic("mmap"); + + rsz = cnt > 4 ? 4 : cnt; + fprintf(stderr, "rsz: %d cnt %d\n", rsz, cnt); + while (cnt > 0) { + char buf[8]; + switch (rsz) { + case 1: + *(char *)buf = *(char *)map + offs; + break; + case 2: + *(short *)buf = *(short *)map + offs/sizeof(short); + break; + case 4: + *(int *)buf = *(int *)map + offs/4; + break; + } + write(1, buf, rsz); + + offs += rsz; + cnt -= rsz; + } + fprintf(stderr, "done\n"); + return 0; +} -- 1.5.3