On Wed, 2015-06-24 at 17:46 +0800, Chen Fan wrote: > when init vfio devices done, we should test all the devices supported > aer whether conflict with others. For each one, get the hot reset > info for the affected device list. For each affected device, all > should attach to the VM and on the same slot. also, we should test > all of the non-AER supporting vfio-pci devices on or below the target > bus to verify they have a reset mechanism. > > Signed-off-by: Chen Fan <chen.fan.f...@cn.fujitsu.com> > --- > hw/vfio/pci.c | 213 > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 206 insertions(+), 7 deletions(-) > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index e1bbd03..c2af8f1 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -32,6 +32,7 @@ > #include "hw/pci/msi.h" > #include "hw/pci/msix.h" > #include "hw/pci/pci.h" > +#include "hw/pci/pci_bus.h" > #include "hw/pci/pci_bridge.h" > #include "qemu-common.h" > #include "qemu/error-report.h" > @@ -2822,6 +2823,184 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, > uint8_t pos) > return 0; > } > > +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, > + PCIHostDeviceAddress *host2) > +{ > + return (host1->domain == host2->domain && host1->bus == host2->bus && > + host1->slot == host2->slot && host1->function == > host2->function); > +} > + > +struct VFIODeviceFind { > + PCIDevice *pdev; > + bool found; > +}; > + > +static void vfio_check_device_reset(PCIBus *bus, void *opaque) > +{ > + int i; > + PCIDevice *dev; > + VFIOPCIDevice *vdev; > + struct VFIODeviceFind *find = opaque; > + > + if (find->found) { > + return; > + } > + > + for (i = 0; i < ARRAY_SIZE(bus->devices); i++) { > + if (!bus->devices[i]) { > + continue; > + } > + dev = bus->devices[i]; > + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) { > + continue; > + } > + vdev = DO_UPCAST(VFIOPCIDevice, pdev, dev); > + if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) && > + !vdev->vbasedev.reset_works) { > + find->pdev = dev; > + find->found = true; > + break; > + } > + } > +} > + > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev) > +{ > + PCIBus *bus = vdev->pdev.bus; > + PCIDevice *pdev = &vdev->pdev; > + struct vfio_pci_hot_reset_info *info = NULL; > + struct vfio_pci_dependent_device *devices; > + VFIOGroup *group; > + struct VFIODeviceFind find; > + bool hotplugged = DEVICE(vdev)->hotplugged; > + int ret, i; > + > + ret = vfio_get_hot_reset_info(vdev, &info); > + if (ret) { > + error_report("vfio: Cannot get hot reset info"); > + goto out;
nit, we never need to free info if this errors, so we could simply 'return ret' here. Maybe you're trying to consolidate the error path since it's safe to free(NULL). > + } > + > + /* we only support hotplug device is single function */ > + if (hotplugged && info->count > 1) { > + error_report("vfio: Cannot enable AER for device %s, " > + "hotplug device only support single function.", > + vdev->vbasedev.name); For nearly all practical purposes, this means we don't support hot-add of vfio-pci for devices with aer=on; the majority of devices are multi-function. If we're willing to eliminate all hot-add except for single function devices, I question the value of what we're doing here. Hot-add of multi-function PCI devices is currently a deficiency in QEMU, and it probably needs to be solved in order for this approach to be viable. > + ret = -1; > + goto out; > + } > + > + /* List all affected devices by bus reset */ > + devices = &info->devices[0]; > + > + /* Verify that we have all the groups required */ > + for (i = 0; i < info->count; i++) { > + PCIHostDeviceAddress host; > + VFIOPCIDevice *tmp; > + VFIODevice *vbasedev_iter; > + bool found = false; > + > + host.domain = devices[i].segment; > + host.bus = devices[i].bus; > + host.slot = PCI_SLOT(devices[i].devfn); > + host.function = PCI_FUNC(devices[i].devfn); > + > + /* Skip the current device */ > + if (vfio_pci_host_match(&host, &vdev->host)) { > + continue; > + } > + > + /* Ensure we own the group of the affected device */ > + QLIST_FOREACH(group, &vfio_group_list, next) { > + if (group->groupid == devices[i].group_id) { > + break; > + } > + } > + > + if (!group) { > + error_report("vfio: Cannot enable AER for device %s, " > + "depends on group %d which is not owned.", > + vdev->vbasedev.name, devices[i].group_id); Printing the device associated with that group would likely be helpful as well. > + ret = -1; > + goto out; > + } > + > + /* Ensure affected devices for reset on the same slot */ > + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { > + if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { > + continue; > + } > + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); > + if (vfio_pci_host_match(&host, &tmp->host)) { > + PCIDevice *pci = PCI_DEVICE(tmp); > + > + if (pci->bus == pdev->bus && > + PCI_SLOT(pci->devfn) == PCI_SLOT(pdev->devfn)) { > + found = true; nit, we could actually put the 'break' here and pull the below code out of the /else/ block out. > + } else { > + error_report("vfio: Cannot enable AER for device %s, " > + "the dependent device %s is not on the same > slot", > + vdev->vbasedev.name, tmp->vbasedev.name); > + ret = -1; > + goto out; > + } > + break; > + } > + } > + > + /* Ensure all affected devices assigned to VM */ > + if (!found) { > + error_report("vfio: Cannot enable AER for device %s, " > + "the dependent device %04x:%02x:%02x.%x " > + "is not assigned to VM.", > + vdev->vbasedev.name, host.domain, host.bus, > + host.slot, host.function); > + ret = -1; > + goto out; > + } > + } > + > + /* > + * Check the all vfio pci devices on or below the target bus > + * have a reset mechanism at least. > + */ > + find.pdev = NULL; > + find.found = false; > + pci_for_each_bus(bus, vfio_check_device_reset, &find); > + if (find.found) { > + error_report("vfio: Cannot enable AER for device %s, " > + "the affected device %s have not a reset mechanism.", > + vdev->vbasedev.name, find.pdev->name); > + ret = -1; > + goto out; > + } Hmm, don't the rules change here, why do we need to allow any non-dependent device in the same slot? I think we can also require that all of those devices support aer=on. The proper english for the error message would be s/have not/does not have/ > + > + ret = 0; > +out: > + g_free(info); > + return ret; > +} > + > +static int vfio_check_devices_host_bus_reset(void) > +{ > + VFIOGroup *group; > + VFIODevice *vbasedev; > + VFIOPCIDevice *vdev; > + > + /* Check All vfio-pci devices if have bus reset capability */ > + QLIST_FOREACH(group, &vfio_group_list, next) { > + QLIST_FOREACH(vbasedev, &group->device_list, next) { Why do we use 'vbasedev_iter' in the previous function and 'vbasedev' here? > + vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); Needs a: if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { continue; } > + if ((vdev->features & VFIO_FEATURE_ENABLE_AER) && > + vfio_check_host_bus_reset(vdev)) { > + return -1; > + } > + } > + } > + > + return 0; > +} > + > static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver, > int pos, uint16_t size) > { > @@ -2858,6 +3037,14 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t > cap_ver, > dev_iter = pci_bridge_get_device(dev_iter->bus); > } > > + if (DEVICE(vdev)->hotplugged) { > + /* Make sure this device does not conflict the existing aer topology > */ > + ret = vfio_check_devices_host_bus_reset(); > + if (ret) { > + return ret; > + } > + } > + > errcap = vfio_pci_read_config(pdev, pdev->exp.aer_cap + PCI_ERR_CAP, 4); > /* > * The ability to record multiple headers is depending on > @@ -3013,13 +3200,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev) > vfio_enable_intx(vdev); > } > > -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, > - PCIHostDeviceAddress *host2) > -{ > - return (host1->domain == host2->domain && host1->bus == host2->bus && > - host1->slot == host2->slot && host1->function == > host2->function); > -} > - > static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) > { > VFIOGroup *group; > @@ -3681,6 +3861,20 @@ static void vfio_setup_resetfn(VFIOPCIDevice *vdev) > } > } > > +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused) > +{ > + int ret; > + > + ret = vfio_check_devices_host_bus_reset(); > + if (ret) { > + exit(1); This should probably be a hw_error() call. Thanks, Alex > + } > +} > + > +static Notifier machine_notifier = { > + .notify = vfio_pci_machine_done_notify, > +}; > + > static int vfio_initfn(PCIDevice *pdev) > { > VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); > @@ -3966,6 +4160,11 @@ static const TypeInfo vfio_pci_dev_info = { > static void register_vfio_pci_dev_type(void) > { > type_register_static(&vfio_pci_dev_info); > + /* > + * Register notifier when machine init is done, since we need > + * check the configration manner after all vfio device are inited. > + */ > + qemu_add_machine_init_done_notifier(&machine_notifier); > } > > type_init(register_vfio_pci_dev_type)