On Thu, 2015-12-17 at 09:41 +0800, Cao jin wrote: > From: Chen Fan <chen.fan.f...@cn.fujitsu.com> > > Particularly, For vfio devices, Once need to recovery devices > by bus reset such as AER, we always need to reset the host bus > to recovery the devices under the bus, so we need to add pci device > callbacks to specify to do host bus reset. > > Signed-off-by: Chen Fan <chen.fan.f...@cn.fujitsu.com> > Reviewed-by: Michael S. Tsirkin <m...@redhat.com> > --- > hw/pci/pci.c | 18 ++++++++++++++++++ > hw/pci/pci_bridge.c | 9 +++++++++ > hw/vfio/pci.c | 26 ++++++++++++++++++++++++++ > hw/vfio/pci.h | 2 ++ > include/hw/pci/pci.h | 7 +++++++ > 5 files changed, 62 insertions(+) > > diff --git a/hw/pci/pci.c b/hw/pci/pci.c > index f6ca6ef..64fa2cc 100644 > --- a/hw/pci/pci.c > +++ b/hw/pci/pci.c > @@ -247,6 +247,24 @@ static void pci_do_device_reset(PCIDevice *dev) > msix_reset(dev); > } > > +void pci_device_pre_reset(PCIBus *bus, PCIDevice *dev, void *unused) > +{ > + PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev); > + > + if (dc->pre_reset) { > + dc->pre_reset(dev); > + } > +} > + > +void pci_device_post_reset(PCIBus *bus, PCIDevice *dev, void > *unused) > +{ > + PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev); > + > + if (dc->post_reset) { > + dc->post_reset(dev); > + } > +} > + > /* > * This function is called on #RST and FLR. > * FLR if PCI_EXP_DEVCTL_BCR_FLR is set > diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c > index 40c97b1..ddb76ab 100644 > --- a/hw/pci/pci_bridge.c > +++ b/hw/pci/pci_bridge.c > @@ -267,8 +267,17 @@ void pci_bridge_write_config(PCIDevice *d, > > newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL); > if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) { > + /* > + * Notify all vfio-pci devices under the bus > + * should do physical bus reset. > + */ > + PCIBus *sec_bus = pci_bridge_get_sec_bus(s); > + pci_for_each_device(sec_bus, pci_bus_num(sec_bus), > + pci_device_pre_reset, NULL); > /* Trigger hot reset on 0->1 transition. */ > qbus_reset_all(&s->sec_bus.qbus); > + pci_for_each_device(sec_bus, pci_bus_num(sec_bus), > + pci_device_post_reset, NULL); > } > } > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index e17dc89..df32618 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -39,6 +39,7 @@ > > static void vfio_disable_interrupts(VFIOPCIDevice *vdev); > static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool > enabled); > +static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single); > > /* > * Disabling BAR mmaping can be slow, but toggling it around INTx > can > @@ -1888,6 +1889,8 @@ static int > vfio_check_host_bus_reset(VFIOPCIDevice *vdev) > /* List all affected devices by bus reset */ > devices = &info->devices[0]; > > + vdev->single_depend_dev = (info->count == 1); > + > /* Verify that we have all the groups required */ > for (i = 0; i < info->count; i++) { > PCIHostDeviceAddress host; > @@ -2029,10 +2032,26 @@ static int > vfio_check_bus_reset(NotifierWithReturn *n, void *opaque) > return vfio_check_host_bus_reset(vdev); > } > > +static void vfio_aer_pre_reset(PCIDevice *pdev) > +{ > + VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); > + > + vdev->aer_reset = true; > + vfio_pci_hot_reset(vdev, vdev->single_depend_dev); > +}
Doesn't this lead to multiple host bus resets per guest bus reset in many cases? It looks like we'll do it once per vfio-pci device, even if those devices are on the same host bus. That's a 1 second operation per device. Can we avoid that? Maybe some sort of sequence ID could help a device figure out whether it's already been reset as part of a dependent device for this particular guest bus reset. Thanks, Alex > + > +static void vfio_aer_post_reset(PCIDevice *pdev) > +{ > + VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); > + > + vdev->aer_reset = false; > +} > + > static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver, > int pos, uint16_t size) > { > PCIDevice *pdev = &vdev->pdev; > + PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(pdev); > PCIDevice *dev_iter; > uint8_t type; > uint32_t errcap; > @@ -2079,6 +2098,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, > uint8_t cap_ver, > vdev->hotplug_notifier.notify = vfio_check_bus_reset; > pci_bus_add_hotplug_notifier(pdev->bus, &vdev- > >hotplug_notifier); > > + dc->pre_reset = vfio_aer_pre_reset; > + dc->post_reset = vfio_aer_post_reset; > + > pcie_cap_deverr_init(pdev); > return pcie_aer_init(pdev, pos, size); > > @@ -2978,6 +3000,10 @@ static void vfio_pci_reset(DeviceState *dev) > > trace_vfio_pci_reset(vdev->vbasedev.name); > > + if (vdev->aer_reset) { > + return; > + } > + > vfio_pci_pre_reset(vdev); > > if (vdev->resetfn && !vdev->resetfn(vdev)) { > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h > index b385f07..5470b97 100644 > --- a/hw/vfio/pci.h > +++ b/hw/vfio/pci.h > @@ -142,6 +142,8 @@ typedef struct VFIOPCIDevice { > bool no_kvm_intx; > bool no_kvm_msi; > bool no_kvm_msix; > + bool aer_reset; > + bool single_depend_dev; > > NotifierWithReturn hotplug_notifier; > } VFIOPCIDevice; > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h > index 379b6e1..6b1f2d4 100644 > --- a/include/hw/pci/pci.h > +++ b/include/hw/pci/pci.h > @@ -105,6 +105,9 @@ typedef void PCIMapIORegionFunc(PCIDevice > *pci_dev, int region_num, > pcibus_t addr, pcibus_t size, int > type); > typedef void PCIUnregisterFunc(PCIDevice *pci_dev); > > +typedef void PCIPreResetFunc(PCIDevice *pci_dev); > +typedef void PCIPostResetFunc(PCIDevice *pci_dev); > + > typedef struct PCIIORegion { > pcibus_t addr; /* current PCI mapping address. -1 means not > mapped */ > #define PCI_BAR_UNMAPPED (~(pcibus_t)0) > @@ -193,6 +196,8 @@ typedef struct PCIDeviceClass { > PCIUnregisterFunc *exit; > PCIConfigReadFunc *config_read; > PCIConfigWriteFunc *config_write; > + PCIPreResetFunc *pre_reset; > + PCIPostResetFunc *post_reset; > > uint16_t vendor_id; > uint16_t device_id; > @@ -380,6 +385,8 @@ bool pci_intx_route_changed(PCIINTxRoute *old, > PCIINTxRoute *new); > void pci_bus_fire_intx_routing_notifier(PCIBus *bus); > void pci_device_set_intx_routing_notifier(PCIDevice *dev, > PCIINTxRoutingNotifier > notifier); > +void pci_device_pre_reset(PCIBus *bus, PCIDevice *d, void *opaque); > +void pci_device_post_reset(PCIBus *bus, PCIDevice *d, void *opaque); > void pci_device_reset(PCIDevice *dev); > > PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,