The PCI device MSIx table is cleaned out in hardware after EEH PE reset. However, we still hold the stale MSIx entries in QEMU, which should be cleared accordingly. Otherwise, we will run into another (recursive) EEH error and the PCI devices contained in the PE have to be offlined exceptionally.
The patch introduces function vfio_eeh_pe_reset(), which is called by sPAPR when asserting hot or fundamental reset, to clear stale MSIx table before EEH PE reset so that MSIx table could be restored properly after EEH PE reset. Signed-off-by: Gavin Shan <gws...@linux.vnet.ibm.com> --- hw/ppc/spapr_pci_vfio.c | 13 +++++++++---- hw/vfio/Makefile.objs | 6 +++++- hw/vfio/pci-stub.c | 16 ++++++++++++++++ hw/vfio/pci.c | 36 ++++++++++++++++++++++++++++++++++++ include/hw/vfio/vfio.h | 2 ++ 5 files changed, 68 insertions(+), 5 deletions(-) create mode 100644 hw/vfio/pci-stub.c diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index 99a1be5..6fa3afe 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -151,19 +151,24 @@ static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option) switch (option) { case RTAS_SLOT_RESET_DEACTIVATE: op.op = VFIO_EEH_PE_RESET_DEACTIVATE; + ret = vfio_container_ioctl(&svphb->phb.iommu_as, + svphb->iommugroupid, + VFIO_EEH_PE_OP, &op); break; case RTAS_SLOT_RESET_HOT: - op.op = VFIO_EEH_PE_RESET_HOT; + ret = vfio_eeh_pe_reset(&svphb->phb.iommu_as, + svphb->iommugroupid, + VFIO_EEH_PE_RESET_HOT); break; case RTAS_SLOT_RESET_FUNDAMENTAL: - op.op = VFIO_EEH_PE_RESET_FUNDAMENTAL; + ret = vfio_eeh_pe_reset(&svphb->phb.iommu_as, + svphb->iommugroupid, + VFIO_EEH_PE_RESET_FUNDAMENTAL); break; default: return RTAS_OUT_PARAM_ERROR; } - ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, - VFIO_EEH_PE_OP, &op); if (ret < 0) { return RTAS_OUT_HW_ERROR; } diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs index e31f30e..1b8a065 100644 --- a/hw/vfio/Makefile.objs +++ b/hw/vfio/Makefile.objs @@ -1,4 +1,8 @@ ifeq ($(CONFIG_LINUX), y) obj-$(CONFIG_SOFTMMU) += common.o -obj-$(CONFIG_PCI) += pci.o +ifeq ($(CONFIG_PCI), y) +obj-y += pci.o +else +obj-y += pci-stub.o +endif endif diff --git a/hw/vfio/pci-stub.c b/hw/vfio/pci-stub.c new file mode 100644 index 0000000..f317c1e --- /dev/null +++ b/hw/vfio/pci-stub.c @@ -0,0 +1,16 @@ +/* + * To include the file on !CONFIG_PCI + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/vfio.h> + +#include "exec/memory.h" +#include "hw/vfio/vfio.h" + +int vfio_eeh_pe_reset(AddressSpace *as, int32_t groupid, uint32_t option) +{ + return -1; +} diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 6b80539..d0fd4b4 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3319,6 +3319,42 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) vdev->req_enabled = false; } +int vfio_eeh_pe_reset(AddressSpace *as, int32_t groupid, uint32_t option) +{ + VFIOGroup *group; + VFIODevice *vbasedev; + VFIOPCIDevice *vdev; + struct vfio_eeh_pe_op op = { + .argsz = sizeof(op), + .op = option + }; + + group = vfio_get_group(groupid, as); + if (!group) { + error_report("vfio: group %d not found\n", groupid); + return -1; + } + + /* + * The MSIx table will be cleaned out by reset. We need + * disable it so that it can be reenabled properly. Also, + * the cached MSIx table should be cleared as it's not + * reflecting the contents in hardware. + */ + QLIST_FOREACH(vbasedev, &group->device_list, next) { + vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); + if (msix_enabled(&vdev->pdev)) { + vfio_disable_msix(vdev); + } + + msix_reset(&vdev->pdev); + } + + vfio_put_group(group); + + return vfio_container_ioctl(as, groupid, VFIO_EEH_PE_OP, &op); +} + static int vfio_initfn(PCIDevice *pdev) { VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h index 0b26cd8..52de277 100644 --- a/include/hw/vfio/vfio.h +++ b/include/hw/vfio/vfio.h @@ -5,5 +5,7 @@ extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid, int req, void *param); +extern int vfio_eeh_pe_reset(AddressSpace *as, + int32_t groupid, uint32_t option); #endif -- 1.8.3.2