>-----Original Message----- >From: Eric Auger <eric.au...@redhat.com> >Sent: Friday, November 17, 2023 9:54 PM >Subject: Re: [PATCH v6 09/21] vfio/iommufd: Enable pci hot reset through >iommufd cdev interface > > > >On 11/14/23 11:09, Zhenzhong Duan wrote: >> Add a new callback iommufd_cdev_pci_hot_reset to do iommufd specific >> check and reset operation. > >nit: Implement the newly introduced pci_hot_reset callback?
Yes >> >> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com> >> --- >> v6: pci_hot_reset return -errno if fails >> >> hw/vfio/iommufd.c | 145 >+++++++++++++++++++++++++++++++++++++++++++ >> hw/vfio/trace-events | 1 + >> 2 files changed, 146 insertions(+) >> >> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c >> index e5bf528e89..3eec428162 100644 >> --- a/hw/vfio/iommufd.c >> +++ b/hw/vfio/iommufd.c >> @@ -24,6 +24,7 @@ >> #include "sysemu/reset.h" >> #include "qemu/cutils.h" >> #include "qemu/chardev_open.h" >> +#include "pci.h" >> >> static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, >> ram_addr_t size, void *vaddr, bool readonly) >> @@ -473,9 +474,153 @@ static void iommufd_cdev_detach(VFIODevice >*vbasedev) >> close(vbasedev->fd); >> } >> >> +static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid) >> +{ >> + VFIODevice *vbasedev_iter; >> + >> + QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) { >> + if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) { >> + continue; >> + } >> + if (devid == vbasedev_iter->devid) { >> + return vbasedev_iter; >> + } >> + } >> + return NULL; >> +} >> + >> +static int iommufd_cdev_pci_hot_reset(VFIODevice *vbasedev, bool single) >> +{ >> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); >> + struct vfio_pci_hot_reset_info *info = NULL; >> + struct vfio_pci_dependent_device *devices; >> + struct vfio_pci_hot_reset *reset; >> + int ret, i; >> + bool multi = false; >> + >> + trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); >> + >> + if (!single) { >> + vfio_pci_pre_reset(vdev); >> + } >> + vdev->vbasedev.needs_reset = false; >> + >> + ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); >> + >> + if (ret) { >> + goto out_single; >> + } >> + >> + assert(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID); >> + >> + devices = &info->devices[0]; >> + >> + if (!(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED)) { >> + if (!vdev->has_pm_reset) { >> + for (i = 0; i < info->count; i++) { >> + if (devices[i].devid == VFIO_PCI_DEVID_NOT_OWNED) { >> + error_report("vfio: Cannot reset device %s, " >> + "depends on device %04x:%02x:%02x.%x " >> + "which is not owned.", >> + vdev->vbasedev.name, devices[i].segment, >> + devices[i].bus, PCI_SLOT(devices[i].devfn), >> + PCI_FUNC(devices[i].devfn)); >> + } >> + } >> + } >> + ret = -EPERM; >> + goto out_single; >> + } >> + >> + trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); >> + >> + for (i = 0; i < info->count; i++) { >> + VFIOPCIDevice *tmp; >> + VFIODevice *vbasedev_iter; >> + >> + trace_iommufd_cdev_pci_hot_reset_dep_devices(devices[i].segment, >> + devices[i].bus, >> + >> PCI_SLOT(devices[i].devfn), >> + >> PCI_FUNC(devices[i].devfn), >> + devices[i].devid); >> + >> + /* >> + * If a VFIO cdev device is resettable, all the dependent devices >> + * are either bound to same iommufd or within same iommu_groups as >> + * one of the iommufd bound devices. >> + */ >> + assert(devices[i].devid != VFIO_PCI_DEVID_NOT_OWNED); >> + >> + if (devices[i].devid == vdev->vbasedev.devid || >> + devices[i].devid == VFIO_PCI_DEVID_OWNED) { >> + continue; >> + } >> + >> + vbasedev_iter = iommufd_cdev_pci_find_by_devid(devices[i].devid); >> + if (!vbasedev_iter || !vbasedev_iter->dev->realized || >> + vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { >> + continue; >> + } >> + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); >> + if (single) { >> + ret = -EINVAL; >> + goto out_single; >> + } >> + vfio_pci_pre_reset(tmp); >> + tmp->vbasedev.needs_reset = false; >> + multi = true; >> + } >> + >> + if (!single && !multi) { >> + ret = -EINVAL; >> + goto out_single; >> + } >> + >> + /* Use zero length array for hot reset with iommufd backend */ >> + reset = g_malloc0(sizeof(*reset)); >> + reset->argsz = sizeof(*reset); >> + >> + /* Bus reset! */ >> + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); >> + g_free(reset); >> + if (ret) { >> + ret = -errno; >> + } >> + >> + trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, >> + ret ? strerror(errno) : "Success"); >> + >> + /* Re-enable INTx on affected devices */ >> + for (i = 0; i < info->count; i++) { >> + VFIOPCIDevice *tmp; >> + VFIODevice *vbasedev_iter; >> + >> + if (devices[i].devid == vdev->vbasedev.devid || >> + devices[i].devid == VFIO_PCI_DEVID_OWNED) { >> + continue; >> + } >> + >> + vbasedev_iter = iommufd_cdev_pci_find_by_devid(devices[i].devid); >> + if (!vbasedev_iter || !vbasedev_iter->dev->realized || >> + vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { >> + continue; >> + } >> + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); >nit: I see this block of code also is used above for the pre_reset. May >be interesting to introduce an helper? Could be done later though Will do in v7. Thanks Zhenzhong >> + vfio_pci_post_reset(tmp); >> + } >> +out_single: >> + if (!single) { >> + vfio_pci_post_reset(vdev); >> + } >> + g_free(info); >> + >> + return ret; >> +} >> + >> const VFIOIOMMUOps vfio_iommufd_ops = { >> .dma_map = iommufd_cdev_map, >> .dma_unmap = iommufd_cdev_unmap, >> .attach_device = iommufd_cdev_attach, >> .detach_device = iommufd_cdev_detach, >> + .pci_hot_reset = iommufd_cdev_pci_hot_reset, >> }; >> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events >> index 5d3e9e8cee..d838232d5a 100644 >> --- a/hw/vfio/trace-events >> +++ b/hw/vfio/trace-events >> @@ -174,3 +174,4 @@ iommufd_cdev_detach_ioas_hwpt(int iommufd, const >char *name, const char *str, in >> iommufd_cdev_fail_attach_existing_container(const char *msg) " %s" >> iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new >IOMMUFD container with ioasid=%d" >> iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int >num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d" >> +iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int >function, int dev_id) "\t%04x:%02x:%02x.%x devid %d" >Otherwise looks good to me. > >Reviewed-by: Eric Auger <eric.au...@redhat.com> > > >Eric > >