On Wed, Mar 21, 2012 at 20:30, Michael S. Tsirkin <m...@redhat.com> wrote: > On Wed, Mar 21, 2012 at 06:29:00PM +0000, Anthony PERARD wrote: >> Signed-off-by: Anthony PERARD <anthony.per...@citrix.com> >> Acked-by: Stefano Stabellini <stefano.stabell...@eu.citrix.com> > > So this interface is really LinuxSysfsPCIDevice. > For example the assumption that you can just open > device by pci address is broken with vfio. > Domain number is also not something anyone > besides linux knows about. > > If I were you I would just call it xen- .... > and if it comes in handy it can be later renamed.
Ok, I will rename that XenHostPCIDevice. >> --- >> Makefile.target | 3 + >> hw/host-pci-device.c | 278 >> ++++++++++++++++++++++++++++++++++++++++++++++++++ >> hw/host-pci-device.h | 75 ++++++++++++++ >> 3 files changed, 356 insertions(+), 0 deletions(-) >> create mode 100644 hw/host-pci-device.c >> create mode 100644 hw/host-pci-device.h >> >> diff --git a/Makefile.target b/Makefile.target >> index 63cf769..0ccfd5b 100644 >> --- a/Makefile.target >> +++ b/Makefile.target >> @@ -232,6 +232,9 @@ obj-$(CONFIG_NO_XEN) += xen-stub.o >> >> obj-i386-$(CONFIG_XEN) += xen_platform.o >> >> +# Xen PCI Passthrough >> +obj-i386-$(CONFIG_XEN_PCI_PASSTHROUGH) += host-pci-device.o >> + >> # Inter-VM PCI shared memory >> CONFIG_IVSHMEM = >> ifeq ($(CONFIG_KVM), y) >> diff --git a/hw/host-pci-device.c b/hw/host-pci-device.c >> new file mode 100644 >> index 0000000..3dacb30 >> --- /dev/null >> +++ b/hw/host-pci-device.c >> @@ -0,0 +1,278 @@ >> +/* >> + * Copyright (C) 2011 Citrix Ltd. >> + * >> + * This work is licensed under the terms of the GNU GPL, version 2. See >> + * the COPYING file in the top-level directory. >> + * >> + */ >> + >> +#include "qemu-common.h" >> +#include "host-pci-device.h" >> + >> +#define PCI_MAX_EXT_CAP \ >> + ((PCIE_CONFIG_SPACE_SIZE - PCI_CONFIG_SPACE_SIZE) / (PCI_CAP_SIZEOF + >> 4)) > > namespace pollution. > name all things HOST_PCI_.... > > in this case, open-coding will make things clearer. > > >> + >> +enum error_code { > > seems unused. So why name the type? > >> + ERROR_SYNTAX = 1, > > We return -1 on error, just do that and you won't need ERROR_SYNTAX. Ok, I'll remove this. >> +}; >> + >> +static int path_to(const HostPCIDevice *d, >> + const char *name, char *buf, ssize_t size) >> +{ >> + return snprintf(buf, size, "/sys/bus/pci/devices/%04x:%02x:%02x.%x/%s", >> + d->domain, d->bus, d->dev, d->func, name); >> +} > > users ignore return value. Also, want to check no overflow > and assert? I will check the return value in this function an then return 0 or -1. >> + >> +static int get_resource(HostPCIDevice *d) >> +{ >> + int i, rc = 0; >> + FILE *f; >> + char path[PATH_MAX]; >> + unsigned long long start, end, flags, size; >> + >> + path_to(d, "resource", path, sizeof (path)); > > I think this might not fit, snprintf needs an extra byte for \0. I just check snprintf write size byte including the \0, sw we should just give the size of the buffer. >> + f = fopen(path, "r"); >> + if (!f) { >> + fprintf(stderr, "Error: Can't open %s: %s\n", path, >> strerror(errno)); >> + return -errno; >> + } >> + >> + for (i = 0; i < PCI_NUM_REGIONS; i++) { >> + if (fscanf(f, "%llx %llx %llx", &start, &end, &flags) != 3) { > > People mentioned that scanf is not a good way to parse input. > Applies here. Ok, I'll do a manual parsing :(. >> + fprintf(stderr, "Error: Syntax error in %s\n", path); >> + rc = ERROR_SYNTAX; >> + break; >> + } >> + if (start) { >> + size = end - start + 1; >> + } else { >> + size = 0; >> + } >> + >> + if (i < PCI_ROM_SLOT) { >> + d->io_regions[i].base_addr = start; >> + d->io_regions[i].size = size; >> + d->io_regions[i].flags = flags; >> + } else { >> + d->rom.base_addr = start; >> + d->rom.size = size; >> + d->rom.flags = flags; >> + } >> + } >> + >> + fclose(f); >> + return rc; >> +} >> + >> +static int get_hex_value(HostPCIDevice *d, const char *name, >> + unsigned long *pvalue) > > why long? Do be a bit generic I suppose, but I just use this function for vendor_id and device_id, I probably just need an int. >> +{ >> + char path[PATH_MAX]; >> + FILE *f; >> + unsigned long value; >> + >> + path_to(d, name, path, sizeof (path)); >> + f = fopen(path, "r"); >> + if (!f) { >> + fprintf(stderr, "Error: Can't open %s: %s\n", path, >> strerror(errno)); >> + return -errno; >> + } >> + if (fscanf(f, "%lx\n", &value) != 1) { >> + fprintf(stderr, "Error: Syntax error in %s\n", path); >> + fclose(f); >> + return ERROR_SYNTAX; >> + } >> + fclose(f); >> + *pvalue = value; >> + return 0; >> +} >> + >> +static bool pci_dev_is_virtfn(HostPCIDevice *d) >> +{ >> + char path[PATH_MAX]; >> + struct stat buf; >> + >> + path_to(d, "physfn", path, sizeof (path)); >> + return !stat(path, &buf); >> +} >> + > > Don't start names with pci_. > It would also be better to avoid things like path_to IMO. Do you mean avoiding the name or the purpose of the function path_to ? For the name, I can probably rename it to sysfs_device_path() >> +static int host_pci_config_fd(HostPCIDevice *d) > > So this opens if needed, and returns. > Why not explicitly open on get? > then you won't need these hacks. Ok, I'll change that. >> +{ >> + char path[PATH_MAX]; >> + >> + if (d->config_fd < 0) { >> + path_to(d, "config", path, sizeof (path)); > > sizeof path > >> + d->config_fd = open(path, O_RDWR); >> + if (d->config_fd < 0) { >> + fprintf(stderr, "HostPCIDevice: Can not open '%s': %s\n", >> + path, strerror(errno)); > > strerror is not thread safe > >> + } >> + } >> + return d->config_fd; >> +} >> +static int host_pci_config_read(HostPCIDevice *d, int pos, void *buf, int >> len) >> +{ >> + int fd = host_pci_config_fd(d); > > You open file on each access? > >> + int res = 0; > > why initialize here? > >> + >> +again: >> + res = pread(fd, buf, len, pos); >> + if (res != len) { >> + if (res < 0 && (errno == EINTR || errno == EAGAIN)) { >> + goto again; > > code loops with while or for. ok. >> + } >> + fprintf(stderr, "%s: read failed: %s (fd: %i)\n", >> + __func__, strerror(errno), fd); >> + return -errno; >> + } >> + return 0; >> +} >> +static int host_pci_config_write(HostPCIDevice *d, >> + int pos, const void *buf, int len) >> +{ >> + int fd = host_pci_config_fd(d); >> + int res = 0; >> + >> +again: >> + res = pwrite(fd, buf, len, pos); >> + if (res != len) { >> + if (res < 0 && (errno == EINTR || errno == EAGAIN)) { >> + goto again; >> + } >> + fprintf(stderr, "%s: write failed: %s\n", >> + __func__, strerror(errno)); >> + return -errno; >> + } >> + return 0; >> +} >> + > > same comments as above. also, > Don't report errors with fprintf. > >> +int host_pci_get_byte(HostPCIDevice *d, int pos, uint8_t *p) >> +{ >> + uint8_t buf; >> + int rc = host_pci_config_read(d, pos, &buf, 1); >> + if (rc == 0) { > > !rc. > >> + *p = buf; > > why not pass in p directly? > >> + } >> + return rc; >> +} >> +int host_pci_get_word(HostPCIDevice *d, int pos, uint16_t *p) >> +{ >> + uint16_t buf; >> + int rc = host_pci_config_read(d, pos, &buf, 2); >> + if (rc == 0) { > > !rc. > >> + *p = le16_to_cpu(buf); >> + } >> + return rc; >> +} > > This looks wrong wrt endian-ness. It's seams that PCI config space registers are little-endian, so, get/read a word/dword from the pci config space should be converted from little-endian to the cpu endian-ness. >> +int host_pci_get_long(HostPCIDevice *d, int pos, uint32_t *p) >> +{ >> + uint32_t buf; >> + int rc = host_pci_config_read(d, pos, &buf, 4); >> + if (rc == 0) { >> + *p = le32_to_cpu(buf); >> + } >> + return rc; >> +} > > Add empty lines between {} It's look nicer when I fold the function to only see one line :), but, I add this empty lines. >> +int host_pci_get_block(HostPCIDevice *d, int pos, uint8_t *buf, int len) >> +{ >> + return host_pci_config_read(d, pos, buf, len); >> +} > > when would this be useful? It's used to initialize the "emulated" config space (of pci.h) and every time a pci config read or write is issued by the guest. >> + >> +int host_pci_set_byte(HostPCIDevice *d, int pos, uint8_t data) >> +{ >> + return host_pci_config_write(d, pos, &data, 1); >> +} >> +int host_pci_set_word(HostPCIDevice *d, int pos, uint16_t data) >> +{ >> + data = cpu_to_le16(data); >> + return host_pci_config_write(d, pos, &data, 2); >> +} >> +int host_pci_set_long(HostPCIDevice *d, int pos, uint32_t data) >> +{ >> + data = cpu_to_le32(data); >> + return host_pci_config_write(d, pos, &data, 4); >> +} >> +int host_pci_set_block(HostPCIDevice *d, int pos, uint8_t *buf, int len) >> +{ >> + return host_pci_config_write(d, pos, buf, len); >> +} >> + >> +uint32_t host_pci_find_ext_cap_offset(HostPCIDevice *d, uint32_t cap) > > Why 32? Ext config offsets are < 12 bit. No apparent reason, the user of this function was just expecting a uint32. >> +{ >> + uint32_t header = 0; >> + int max_cap = PCI_MAX_EXT_CAP; >> + int pos = PCI_CONFIG_SPACE_SIZE; >> + >> + do { >> + if (host_pci_get_long(d, pos, &header)) { >> + break; >> + } >> + /* >> + * If we have no capabilities, this is indicated by cap ID, >> + * cap version and next pointer all being 0. >> + */ >> + if (header == 0) { >> + break; >> + } >> + >> + if (PCI_EXT_CAP_ID(header) == cap) { >> + return pos; >> + } >> + >> + pos = PCI_EXT_CAP_NEXT(header); >> + if (pos < PCI_CONFIG_SPACE_SIZE) { >> + break; >> + } >> + >> + max_cap--; >> + } while (max_cap > 0); >> + >> + return 0; >> +} >> + >> +HostPCIDevice *host_pci_device_get(uint8_t bus, uint8_t dev, uint8_t func) > > Why skip domain in the interface? > Also, HostPCIDevice structure is public so there is little value > in allocating, just get it by pointer and init/cleanup. You mean like pci_bus_new_inplace ? Ok, I'll do that. >> +{ >> + HostPCIDevice *d = NULL; >> + unsigned long v = 0; >> + >> + d = g_new0(HostPCIDevice, 1); >> + >> + d->config_fd = -1; >> + d->domain = 0; >> + d->bus = bus; >> + d->dev = dev; >> + d->func = func; >> + >> + if (host_pci_config_fd(d) == -1) { >> + goto error; >> + } >> + if (get_resource(d) != 0) { > > just get_resource(d). > >> + goto error; >> + } >> + >> + if (get_hex_value(d, "vendor", &v)) { >> + goto error; >> + } >> + d->vendor_id = v; >> + if (get_hex_value(d, "device", &v)) { >> + goto error; >> + } >> + d->device_id = v; >> + d->is_virtfn = pci_dev_is_virtfn(d); >> + >> + return d; >> +error: >> + if (d->config_fd >= 0) { >> + close(d->config_fd); >> + } >> + g_free(d); >> + return NULL; >> +} >> + >> +void host_pci_device_put(HostPCIDevice *d) >> +{ >> + if (d->config_fd >= 0) { >> + close(d->config_fd); >> + } >> + g_free(d); >> +} >> diff --git a/hw/host-pci-device.h b/hw/host-pci-device.h >> new file mode 100644 >> index 0000000..c8880eb >> --- /dev/null >> +++ b/hw/host-pci-device.h >> @@ -0,0 +1,75 @@ >> +#ifndef HW_HOST_PCI_DEVICE >> +# define HW_HOST_PCI_DEVICE > > Don't put space after #. > > Also HOST_PCI_DEVICE_H would be less likely to confuse. > >> + >> +#include "pci.h" >> + >> +/* >> + * from linux/ioport.h >> + * IO resources have these defined flags. >> + */ >> +#define IORESOURCE_BITS 0x000000ff /* Bus-specific bits */ >> + >> +#define IORESOURCE_TYPE_BITS 0x00000f00 /* Resource type */ >> +#define IORESOURCE_IO 0x00000100 >> +#define IORESOURCE_MEM 0x00000200 >> +#define IORESOURCE_IRQ 0x00000400 >> +#define IORESOURCE_DMA 0x00000800 >> + >> +#define IORESOURCE_PREFETCH 0x00001000 /* No side effects */ >> +#define IORESOURCE_READONLY 0x00002000 >> +#define IORESOURCE_CACHEABLE 0x00004000 >> +#define IORESOURCE_RANGELENGTH 0x00008000 >> +#define IORESOURCE_SHADOWABLE 0x00010000 >> + >> +#define IORESOURCE_SIZEALIGN 0x00020000 /* size indicates alignment >> */ >> +#define IORESOURCE_STARTALIGN 0x00040000 /* start field is alignment >> */ >> + >> +#define IORESOURCE_MEM_64 0x00100000 >> + >> + /* Userland may not map this resource */ >> +#define IORESOURCE_EXCLUSIVE 0x08000000 >> +#define IORESOURCE_DISABLED 0x10000000 >> +#define IORESOURCE_UNSET 0x20000000 >> +#define IORESOURCE_AUTO 0x40000000 >> + /* Driver has marked this resource busy */ >> +#define IORESOURCE_BUSY 0x80000000 >> + > > Why do above make sense in an API? > Abstract it in some reasonable way, don't just expose > flags from sysfs as is. Ok. >> + > > kill extra empty lines > >> +typedef struct HostPCIIORegion { >> + unsigned long flags; >> + pcibus_t base_addr; >> + pcibus_t size; >> +} HostPCIIORegion; >> + >> +typedef struct HostPCIDevice { >> + uint16_t domain; >> + uint8_t bus; >> + uint8_t dev; >> + uint8_t func; >> + >> + uint16_t vendor_id; >> + uint16_t device_id; >> + >> + HostPCIIORegion io_regions[PCI_NUM_REGIONS - 1]; >> + HostPCIIORegion rom; >> + >> + bool is_virtfn; >> + >> + int config_fd; >> +} HostPCIDevice; >> + >> +HostPCIDevice *host_pci_device_get(uint8_t bus, uint8_t dev, uint8_t func); >> +void host_pci_device_put(HostPCIDevice *pci_dev); >> + >> +int host_pci_get_byte(HostPCIDevice *d, int pos, uint8_t *p); >> +int host_pci_get_word(HostPCIDevice *d, int pos, uint16_t *p); >> +int host_pci_get_long(HostPCIDevice *d, int pos, uint32_t *p); >> +int host_pci_get_block(HostPCIDevice *d, int pos, uint8_t *buf, int len); >> +int host_pci_set_byte(HostPCIDevice *d, int pos, uint8_t data); >> +int host_pci_set_word(HostPCIDevice *d, int pos, uint16_t data); >> +int host_pci_set_long(HostPCIDevice *d, int pos, uint32_t data); >> +int host_pci_set_block(HostPCIDevice *d, int pos, uint8_t *buf, int len); >> + >> +uint32_t host_pci_find_ext_cap_offset(HostPCIDevice *s, uint32_t cap); >> + >> +#endif /* !HW_HOST_PCI_DEVICE */ >> -- >> Anthony PERARD > -- Anthony PERARD