On Thu, Mar 26, 2015 at 04:35:01PM +1100, Gavin Shan wrote: > The PCI device MSIx table is cleaned out in hardware after EEH PE > reset. However, we still hold the stale MSIx entries in QEMU, which > should be cleared accordingly. Otherwise, we will run into another > (recursive) EEH error and the PCI devices contained in the PE have > to be offlined exceptionally. > > The patch introduces function vfio_eeh_pe_reset(), which is called > by sPAPR when asserting hot or fundamental reset, to clear stale MSIx > table before EEH PE reset so that MSIx table could be restored properly > after EEH PE reset. > > Signed-off-by: Gavin Shan <gws...@linux.vnet.ibm.com> > --- > hw/ppc/spapr_pci_vfio.c | 13 +++++++++---- > hw/vfio/Makefile.objs | 6 +++++- > hw/vfio/pci-stub.c | 16 ++++++++++++++++ > hw/vfio/pci.c | 36 ++++++++++++++++++++++++++++++++++++ > include/hw/vfio/vfio.h | 2 ++ > 5 files changed, 68 insertions(+), 5 deletions(-) > create mode 100644 hw/vfio/pci-stub.c > > diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c > index 99a1be5..6fa3afe 100644 > --- a/hw/ppc/spapr_pci_vfio.c > +++ b/hw/ppc/spapr_pci_vfio.c > @@ -151,19 +151,24 @@ static int spapr_phb_vfio_eeh_reset(sPAPRPHBState > *sphb, int option) > switch (option) { > case RTAS_SLOT_RESET_DEACTIVATE: > op.op = VFIO_EEH_PE_RESET_DEACTIVATE; > + ret = vfio_container_ioctl(&svphb->phb.iommu_as, > + svphb->iommugroupid, > + VFIO_EEH_PE_OP, &op);
For consistency, I think all the reset operations should go through vfio_eeh_pe_reset(), even though in this case it won't do more than call vfio_container_ioctl(). > break; > case RTAS_SLOT_RESET_HOT: > - op.op = VFIO_EEH_PE_RESET_HOT; > + ret = vfio_eeh_pe_reset(&svphb->phb.iommu_as, > + svphb->iommugroupid, > + VFIO_EEH_PE_RESET_HOT); > break; > case RTAS_SLOT_RESET_FUNDAMENTAL: > - op.op = VFIO_EEH_PE_RESET_FUNDAMENTAL; > + ret = vfio_eeh_pe_reset(&svphb->phb.iommu_as, > + svphb->iommugroupid, > + VFIO_EEH_PE_RESET_FUNDAMENTAL); > break; > default: > return RTAS_OUT_PARAM_ERROR; > } > > - ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, > - VFIO_EEH_PE_OP, &op); > if (ret < 0) { > return RTAS_OUT_HW_ERROR; > } > diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs > index e31f30e..1b8a065 100644 > --- a/hw/vfio/Makefile.objs > +++ b/hw/vfio/Makefile.objs > @@ -1,4 +1,8 @@ > ifeq ($(CONFIG_LINUX), y) > obj-$(CONFIG_SOFTMMU) += common.o > -obj-$(CONFIG_PCI) += pci.o > +ifeq ($(CONFIG_PCI), y) > +obj-y += pci.o > +else > +obj-y += pci-stub.o > +endif > endif > diff --git a/hw/vfio/pci-stub.c b/hw/vfio/pci-stub.c > new file mode 100644 > index 0000000..f317c1e > --- /dev/null > +++ b/hw/vfio/pci-stub.c > @@ -0,0 +1,16 @@ > +/* > + * To include the file on !CONFIG_PCI > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + */ > + > +#include <linux/vfio.h> > + > +#include "exec/memory.h" > +#include "hw/vfio/vfio.h" > + > +int vfio_eeh_pe_reset(AddressSpace *as, int32_t groupid, uint32_t option) > +{ > + return -1; Probably should have assert(0) here - this should never be called if !CONFIG_PCI. > +} > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index 6b80539..d0fd4b4 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -3319,6 +3319,42 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice > *vdev) > vdev->req_enabled = false; > } > > +int vfio_eeh_pe_reset(AddressSpace *as, int32_t groupid, uint32_t option) > +{ > + VFIOGroup *group; > + VFIODevice *vbasedev; > + VFIOPCIDevice *vdev; > + struct vfio_eeh_pe_op op = { > + .argsz = sizeof(op), > + .op = option > + }; > + > + group = vfio_get_group(groupid, as); > + if (!group) { > + error_report("vfio: group %d not found\n", groupid); > + return -1; > + } > + > + /* > + * The MSIx table will be cleaned out by reset. We need > + * disable it so that it can be reenabled properly. Also, > + * the cached MSIx table should be cleared as it's not > + * reflecting the contents in hardware. > + */ > + QLIST_FOREACH(vbasedev, &group->device_list, next) { > + vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); > + if (msix_enabled(&vdev->pdev)) { > + vfio_disable_msix(vdev); > + } > + > + msix_reset(&vdev->pdev); > + } > + > + vfio_put_group(group); > + > + return vfio_container_ioctl(as, groupid, VFIO_EEH_PE_OP, &op); > +} > + > static int vfio_initfn(PCIDevice *pdev) > { > VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); > diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h > index 0b26cd8..52de277 100644 > --- a/include/hw/vfio/vfio.h > +++ b/include/hw/vfio/vfio.h > @@ -5,5 +5,7 @@ > > extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid, > int req, void *param); > +extern int vfio_eeh_pe_reset(AddressSpace *as, > + int32_t groupid, uint32_t option); > > #endif -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
pgpmdplvgjjFc.pgp
Description: PGP signature