On 07/24/2014 11:51 AM, Eric Auger wrote: > On 07/09/2014 12:43 AM, Alex Williamson wrote: >> On Mon, 2014-07-07 at 13:27 +0100, Eric Auger wrote: >>> vfio_get_device now takes a VFIODevice as argument. The function is split >>> into 4 functional parts: dev_info query, device check, region populate >>> and interrupt populate. the last 3 are specialized by parent device and >>> are added into DeviceOps. >>> >>> 3 new fields are introduced in VFIODevice to store dev_info. >>> >>> vfio_put_base_device is created. >>> >>> Signed-off-by: Eric Auger <eric.au...@linaro.org> >>> --- >>> hw/vfio/pci.c | 181 >>> +++++++++++++++++++++++++++++++++++++++------------------- >>> 1 file changed, 121 insertions(+), 60 deletions(-) >>> >>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c >>> index 5f0164a..d228cf8 100644 >>> --- a/hw/vfio/pci.c >>> +++ b/hw/vfio/pci.c >>> @@ -194,12 +194,18 @@ typedef struct VFIODevice { >>> bool reset_works; >>> bool needs_reset; >>> VFIODeviceOps *ops; >>> + unsigned int num_irqs; >>> + unsigned int num_regions; >>> + unsigned int flags; >>> } VFIODevice; >>> >>> struct VFIODeviceOps { >>> bool (*vfio_compute_needs_reset)(VFIODevice *vdev); >>> int (*vfio_hot_reset_multi)(VFIODevice *vdev); >>> void (*vfio_eoi)(VFIODevice *vdev); >>> + int (*vfio_check_device)(VFIODevice *vdev); >>> + int (*vfio_populate_regions)(VFIODevice *vdev); >>> + int (*vfio_populate_interrupts)(VFIODevice *vdev); >>> }; >>> >>> typedef struct VFIOPCIDevice { >>> @@ -286,6 +292,10 @@ static uint32_t vfio_pci_read_config(PCIDevice *pdev, >>> uint32_t addr, int len); >>> static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, >>> uint32_t val, int len); >>> static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); >>> +static void vfio_put_base_device(VFIODevice *vbasedev); >>> +static int vfio_check_device(VFIODevice *vbasedev); >>> +static int vfio_populate_regions(VFIODevice *vbasedev); >>> +static int vfio_populate_interrupts(VFIODevice *vbasedev); >>> >>> /* >>> * Common VFIO interrupt disable >>> @@ -3585,6 +3595,9 @@ static VFIODeviceOps vfio_pci_ops = { >>> .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, >>> .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, >>> .vfio_eoi = vfio_eoi, >>> + .vfio_check_device = vfio_check_device, >>> + .vfio_populate_regions = vfio_populate_regions, >>> + .vfio_populate_interrupts = vfio_populate_interrupts, >>> }; >>> >>> static void vfio_reset_handler(void *opaque) >>> @@ -3927,54 +3940,53 @@ static void vfio_put_group(VFIOGroup *group) >>> } >>> } >>> >>> -static int vfio_get_device(VFIOGroup *group, const char *name, >>> - VFIOPCIDevice *vdev) >>> +static int vfio_check_device(VFIODevice *vbasedev) >>> { >>> - struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; >>> - struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) }; >>> - struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; >>> - int ret, i; >>> - >>> - ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); >>> - if (ret < 0) { >>> - error_report("vfio: error getting device %s from group %d: %m", >>> - name, group->groupid); >>> - error_printf("Verify all devices in group %d are bound to vfio-pci >>> " >>> - "or pci-stub and not already in use\n", >>> group->groupid); >>> - return ret; >>> + if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_PCI)) { >>> + error_report("vfio: Um, this isn't a PCI device"); >>> + goto error; >>> } >>> - >>> - vdev->vbasedev.fd = ret; >>> - vdev->vbasedev.group = group; >>> - QLIST_INSERT_HEAD(&group->device_list, &vdev->vbasedev, next); >>> - >>> - /* Sanity check device */ >>> - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_INFO, &dev_info); >>> - if (ret) { >>> - error_report("vfio: error getting device info: %m"); >>> + if (vbasedev->num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1) { >>> + error_report("vfio: unexpected number of io regions %u", >>> + vbasedev->num_regions); >>> goto error; >>> } >>> - >>> - DPRINTF("Device %s flags: %u, regions: %u, irgs: %u\n", name, >>> - dev_info.flags, dev_info.num_regions, dev_info.num_irqs); >>> - >>> - if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) { >>> - error_report("vfio: Um, this isn't a PCI device"); >>> + if (vbasedev->num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1) { >>> + error_report("vfio: unexpected number of irqs %u", >>> + vbasedev->num_irqs); >>> goto error; >>> } >>> + return 0; >>> +error: >>> + vfio_put_base_device(vbasedev); >> >> This doesn't make much sense, this function never "got" the base device, >> so why does it need to "put" it on error? We should simply return error >> and the caller (presumably who got it) should put the device. > Hi Alex, > > definitively I need to revisit and homogenize my error handling: all > sub-functions just returning errors - if sensible- , get/put at upper > level. errno misusage. Sorry for that :-( >> >>> + return -errno; >> >> Nothing above seems to guarantee we have anything useful in errno (or >> that it hasn't been clobbered). > replaced by -1 >> >>> +} >>> >>> - vdev->vbasedev.reset_works = !!(dev_info.flags & >>> VFIO_DEVICE_FLAGS_RESET); >>> +static int vfio_populate_interrupts(VFIODevice *vbasedev) >>> +{ >>> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); >>> + int ret; >>> + struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; >>> + irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; >>> >>> - if (dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1) { >>> - error_report("vfio: unexpected number of io regions %u", >>> - dev_info.num_regions); >>> - goto error; >>> + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); >>> + if (ret) { >>> + /* This can fail for an old kernel or legacy PCI dev */ >>> + DPRINTF("VFIO_DEVICE_GET_IRQ_INFO failure: %m\n"); >>> + } else if (irq_info.count == 1) { >>> + vdev->pci_aer = true; >>> + } else { >>> + error_report("vfio: %s Could not enable error recovery for the >>> device", >>> + vbasedev->name); >>> } >>> + return ret; >> >> This function returns error if the device doesn't support error >> reporting, which is an optional feature. I don't think that's what we >> want. > OK misunderstood the comment. function proto changed to return void. after some more thoughts related to platform usage I would like to keep the return value and set it to 0 for PCI.
BR Eric >> >>> +} >>> >>> - if (dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1) { >>> - error_report("vfio: unexpected number of irqs %u", >>> dev_info.num_irqs); >>> - goto error; >>> - } >>> +static int vfio_populate_regions(VFIODevice *vbasedev) >>> +{ >>> + struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) }; >>> + int i, ret; >>> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); >>> >>> for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; >>> i++) { >>> reg_info.index = i; >>> @@ -4018,7 +4030,7 @@ static int vfio_get_device(VFIOGroup *group, const >>> char *name, >>> vdev->config_offset = reg_info.offset; >>> >>> if ((vdev->features & VFIO_FEATURE_ENABLE_VGA) && >>> - dev_info.num_regions > VFIO_PCI_VGA_REGION_INDEX) { >>> + vbasedev->num_regions > VFIO_PCI_VGA_REGION_INDEX) { >>> struct vfio_region_info vga_info = { >>> .argsz = sizeof(vga_info), >>> .index = VFIO_PCI_VGA_REGION_INDEX, >>> @@ -4057,38 +4069,87 @@ static int vfio_get_device(VFIOGroup *group, const >>> char *name, >>> >>> vdev->has_vga = true; >>> } >>> - irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; >>> + return 0; >>> +error: >>> + vfio_put_base_device(vbasedev); >>> + return -errno; >> >> errno can get clobbered by here, don't count on it. Also, why does this >> put the base device while interrupt_populate error does not? The put >> needs to happen a level above these functions imho. > > ok >> >>> +} >>> + >>> +static int vfio_get_device(VFIOGroup *group, const char *name, >>> + VFIODevice *vbasedev) >>> +{ >>> + struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; >>> + struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) }; >>> + struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; >>> + int ret; >>> + >>> + ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); >>> + if (ret < 0) { >>> + error_report("vfio: error getting device %s from group %d: %m", >>> + name, group->groupid); >>> + error_printf("Verify all devices in group %d are bound to vfio-pci >>> " >>> + "or pci-stub and not already in use\n", >>> group->groupid); >>> + return ret; >>> + } >>> + >>> + vbasedev->fd = ret; >>> + vbasedev->group = group; >>> + QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); >>> >>> - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); >>> + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_INFO, &dev_info); >>> if (ret) { >>> - /* This can fail for an old kernel or legacy PCI dev */ >>> - DPRINTF("VFIO_DEVICE_GET_IRQ_INFO failure: %m\n"); >>> - ret = 0; >>> - } else if (irq_info.count == 1) { >>> - vdev->pci_aer = true; >>> - } else { >>> - error_report("vfio: %04x:%02x:%02x.%x " >>> - "Could not enable error recovery for the device", >>> - vdev->host.domain, vdev->host.bus, vdev->host.slot, >>> - vdev->host.function); >>> + error_report("vfio: error getting device info: %m"); >>> + goto error; >>> + } >>> + >>> + vbasedev->num_irqs = dev_info.num_irqs; >>> + vbasedev->num_regions = dev_info.num_regions; >>> + vbasedev->flags = dev_info.flags; >>> + >>> + DPRINTF("Device %s flags: %u, regions: %u, irgs: %u\n", name, >>> + dev_info.flags, dev_info.num_regions, dev_info.num_irqs); >>> + >>> + vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); >>> + >>> + /* call device specific functions */ >>> + ret = vbasedev->ops->vfio_check_device(vbasedev); >>> + if (ret) { >>> + error_report("vfio: error when checking device %s\n", >>> + vbasedev->name); >>> + goto error; >>> + } >>> + ret = vbasedev->ops->vfio_populate_regions(vbasedev); >>> + if (ret) { >>> + error_report("vfio: error when populating regions of device %s\n", >>> + vbasedev->name); >>> + goto error; >>> + } >>> + ret = vbasedev->ops->vfio_populate_interrupts(vbasedev); >>> + if (ret) { >>> + error_report("vfio: error when populating interrupts of device >>> %s\n", >>> + vbasedev->name); >>> + goto error; >>> } >>> >>> error: >>> if (ret) { >>> - QLIST_REMOVE(&vdev->vbasedev, next); >>> - vdev->vbasedev.group = NULL; >>> - close(vdev->vbasedev.fd); >>> + vfio_put_base_device(vbasedev); >> >> Whoops, more confusion, the call-out functions are doing put calls >> (well, some of them) and so is this. This is the only place it should >> occur. > OK >> >>> } >>> return ret; >>> } >>> >>> -static void vfio_put_device(VFIOPCIDevice *vdev) >>> +void vfio_put_base_device(VFIODevice *vbasedev) >>> { >>> - QLIST_REMOVE(&vdev->vbasedev, next); >>> - vdev->vbasedev.group = NULL; >>> + QLIST_REMOVE(vbasedev, next); >>> + vbasedev->group = NULL; >>> DPRINTF("vfio_put_device: close vdev->fd\n"); >>> - close(vdev->vbasedev.fd); >>> - g_free(vdev->vbasedev.name); >>> + close(vbasedev->fd); >>> + g_free(vbasedev->name); >> >> get/put of the base device is still a bit messy. .name doesn't get >> allocated by the get, but gets freed by the put. > > .name dealloc moved to vfio_put_device > > Thank you for your review > > Best Regards > > Eric >> >>> +} >>> + >>> +static void vfio_put_device(VFIOPCIDevice *vdev) >>> +{ >>> + vfio_put_base_device(&vdev->vbasedev); >>> if (vdev->msix) { >>> g_free(vdev->msix); >>> vdev->msix = NULL; >>> @@ -4266,7 +4327,7 @@ static int vfio_initfn(PCIDevice *pdev) >>> } >>> } >>> >>> - ret = vfio_get_device(group, path, vdev); >>> + ret = vfio_get_device(group, path, &vdev->vbasedev); >>> if (ret) { >>> error_report("vfio: failed to get device %s", path); >>> vfio_put_group(group); >> >> >> >