Add an s390x specific callback for vfio error handling. For s390x pci devices, we have platform specific error information. We need to retrieve this error information for passthrough devices. This is done via a memory region which exposes that information.
Once this error information is retrieved we can then inject an error into the guest, and let the guest drive the recovery. Signed-off-by: Farhan Ali <al...@linux.ibm.com> --- hw/s390x/s390-pci-bus.c | 5 ++ hw/s390x/s390-pci-vfio.c | 82 ++++++++++++++++++++++++++++++++ include/hw/s390x/s390-pci-bus.h | 1 + include/hw/s390x/s390-pci-vfio.h | 2 + 4 files changed, 90 insertions(+) diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index f87d2748b6..af42eb9938 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -158,6 +158,8 @@ static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev) { HotplugHandler *hotplug_ctrl; + qemu_mutex_destroy(&pbdev->err_handler_lock); + if (pbdev->pft == ZPCI_PFT_ISM) { notifier_remove(&pbdev->shutdown_notifier); } @@ -1140,6 +1142,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, pbdev->iommu->pbdev = pbdev; pbdev->state = ZPCI_FS_DISABLED; set_pbdev_info(pbdev); + qemu_mutex_init(&pbdev->err_handler_lock); if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) { /* @@ -1164,6 +1167,8 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev); /* Fill in CLP information passed via the vfio region */ s390_pci_get_clp_info(pbdev); + /* Setup error handler for error recovery */ + s390_pci_setup_err_handler(pbdev); if (!pbdev->interp) { /* Do vfio passthrough but intercept for I/O */ pbdev->fh |= FH_SHM_VFIO; diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c index aaf91319b4..ceee342f4a 100644 --- a/hw/s390x/s390-pci-vfio.c +++ b/hw/s390x/s390-pci-vfio.c @@ -10,6 +10,7 @@ */ #include "qemu/osdep.h" +#include "qemu/error-report.h" #include <sys/ioctl.h> #include <linux/vfio.h> @@ -103,6 +104,70 @@ void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt) } } +static int s390_pci_read_error_region(VFIOPCIDevice *vfio_pci, + struct vfio_device_zpci_err_region *err) +{ + struct vfio_region_info *region = NULL; + g_autofree void *buf; + int ret; + + ret = vfio_device_get_region_info_type(&vfio_pci->vbasedev, + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_IBM, + VFIO_REGION_SUBTYPE_IBM_ZPCI_ERROR_REGION, ®ion); + + if (ret) { + error_report("Failed to get the region info for passthrough device" + " (rc=%d)", ret); + return ret; + } + + buf = g_malloc0(region->size); + + if (!buf) { + error_report("Failed to allocate memory for error region"); + return -ENOMEM; + } + + ret = pread(vfio_pci->vbasedev.fd, buf, region->size, region->offset); + if (ret != region->size) { + error_report("Failed to read vfio zpci error region"); + return -EINVAL; + } + + memcpy(err, (struct vfio_device_zpci_err_region *) buf, + sizeof(struct vfio_device_zpci_err_region)); + return 0; +} + +static void s390_pci_err_handler(VFIOPCIDevice *vfio_pci) +{ + S390PCIBusDevice *pbdev; + struct vfio_device_zpci_err_region err; + int ret; + + pbdev = s390_pci_find_dev_by_target(s390_get_phb(), + DEVICE(&vfio_pci->pdev)->id); + + QEMU_LOCK_GUARD(&pbdev->err_handler_lock); + + ret = s390_pci_read_error_region(vfio_pci, &err); + if (ret) { + return; + } + + pbdev->state = ZPCI_FS_ERROR; + s390_pci_generate_error_event(err.pec, pbdev->fh, pbdev->fid, 0, 0); + + while (err.pending_errors) { + ret = s390_pci_read_error_region(vfio_pci, &err); + if (ret) { + return; + } + s390_pci_generate_error_event(err.pec, pbdev->fh, pbdev->fid, 0, 0); + } + return; +} + static void s390_pci_read_base(S390PCIBusDevice *pbdev, struct vfio_device_info *info) { @@ -369,3 +434,20 @@ void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) s390_pci_read_util(pbdev, info); s390_pci_read_pfip(pbdev, info); } + +void s390_pci_setup_err_handler(S390PCIBusDevice *pbdev) +{ + int ret; + struct vfio_region_info *region = NULL; + VFIOPCIDevice *vdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev); + + ret = vfio_device_get_region_info_type(&vdev->vbasedev, + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_IBM, + VFIO_REGION_SUBTYPE_IBM_ZPCI_ERROR_REGION, ®ion); + + if (ret) { + info_report("Automated error recovery not available for passthrough device"); + return; + } + vdev->arch_err_handler = s390_pci_err_handler; +} diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h index 04944d4fed..3795e0bbfc 100644 --- a/include/hw/s390x/s390-pci-bus.h +++ b/include/hw/s390x/s390-pci-bus.h @@ -364,6 +364,7 @@ struct S390PCIBusDevice { bool forwarding_assist; bool aif; bool rtr_avail; + QemuMutex err_handler_lock; QTAILQ_ENTRY(S390PCIBusDevice) link; }; diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h index ae1b126ff7..66b274293c 100644 --- a/include/hw/s390x/s390-pci-vfio.h +++ b/include/hw/s390x/s390-pci-vfio.h @@ -22,6 +22,7 @@ S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt); bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh); void s390_pci_get_clp_info(S390PCIBusDevice *pbdev); +void s390_pci_setup_err_handler(S390PCIBusDevice *pbdev); #else static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail) { @@ -39,6 +40,7 @@ static inline bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh) return false; } static inline void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) { } +static inline void s390_pci_setup_err_handler(S390PCIBusDevice *pbdev) { } #endif #endif -- 2.43.0