Hi, Shjith Could you help to review and verify if this fix can meet your requirement?
Thanks Jingjing > -----Original Message----- > From: Wu, Jingjing > Sent: Tuesday, October 10, 2017 6:09 AM > To: Yigit, Ferruh <ferruh.yi...@intel.com>; Tan, Jianfeng > <jianfeng....@intel.com>; shijith.thot...@caviumnetworks.com; > greg...@weka.io; Xing, Beilei <beilei.x...@intel.com> > Cc: dev@dpdk.org; Wu, Jingjing <jingjing...@intel.com>; sta...@dpdk.org > Subject: [PATCH v2 2/2] igb_uio: fix interrupt enablement after FLR in VM > > If pass-through a VF by vfio-pci to a Qemu VM, after FLR in VM, the interrupt > setting is not recoverd correctly to host as below: > in VM guest: > Capabilities: [70] MSI-X: Enable+ Count=5 Masked- in Host: > Capabilities: [70] MSI-X: Enable+ Count=5 Masked- > > That was because in pci_reset_function, it first reads the PCI configure and > set > FLR reset, and then writes PCI configure as restoration. But not all the > writing > are successful to Host. > Because vfio-pci driver doesn't allow directly write PCI MSI-X Cap. > > To fix this issue, we need to move the interrupt enablement from igb_uio probe > to open device file. While it is also the similar as the behaviour in vfio_pci > kernel module code. > > Fixes: b58eedfc7dd5 ("igb_uio: issue FLR during open and release of device > file") > > Cc: sta...@dpdk.org > > Signed-off-by: Jingjing Wu <jingjing...@intel.com> > Signed-off-by: Jianfeng Tan <jianfeng....@intel.com> > --- > v2 change: > - fix typo > - remove duplicated debug info > > lib/librte_eal/linuxapp/igb_uio/igb_uio.c | 221 > ++++++++++++++++-------------- > 1 file changed, 119 insertions(+), 102 deletions(-) > > diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c > b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c > index c8dd5f4..d943795 100644 > --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c > +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c > @@ -49,7 +49,6 @@ struct rte_uio_pci_dev { > > static char *intr_mode; > static enum rte_intr_mode igbuio_intr_mode_preferred = > RTE_INTR_MODE_MSIX; > - > /* sriov sysfs */ > static ssize_t > show_max_vfs(struct device *dev, struct device_attribute *attr, @@ -144,8 > +143,10 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state) > * If yes, disable it here and will be enable later. > */ > static irqreturn_t > -igbuio_pci_irqhandler(int irq, struct uio_info *info) > +igbuio_pci_irqhandler(int irq, void *dev_id) > { > + struct uio_device *idev = (struct uio_device *)dev_id; > + struct uio_info *info = idev->info; > struct rte_uio_pci_dev *udev = info->priv; > > /* Legacy mode need to mask in hardware */ @@ -153,10 +154,115 > @@ igbuio_pci_irqhandler(int irq, struct uio_info *info) > !pci_check_and_mask_intx(udev->pdev)) > return IRQ_NONE; > > + uio_event_notify(info); > + > /* Message signal mode, no share IRQ and automasked */ > return IRQ_HANDLED; > } > > +static int > +igbuio_pci_enable_interrupts(struct rte_uio_pci_dev *udev) { > + int err = 0; > +#ifndef HAVE_ALLOC_IRQ_VECTORS > + struct msix_entry msix_entry; > +#endif > + > + switch (igbuio_intr_mode_preferred) { > + case RTE_INTR_MODE_MSIX: > + /* Only 1 msi-x vector needed */ > +#ifndef HAVE_ALLOC_IRQ_VECTORS > + msix_entry.entry = 0; > + if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) { > + dev_dbg(&udev->pdev->dev, "using MSI-X"); > + udev->info.irq_flags = IRQF_NO_THREAD; > + udev->info.irq = msix_entry.vector; > + udev->mode = RTE_INTR_MODE_MSIX; > + break; > + } > +#else > + if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSIX) == 1) > { > + dev_dbg(&udev->pdev->dev, "using MSI-X"); > + udev->info.irq_flags = IRQF_NO_THREAD; > + udev->info.irq = pci_irq_vector(udev->pdev, 0); > + udev->mode = RTE_INTR_MODE_MSIX; > + break; > + } > +#endif > + > + /* fall back to MSI */ > + case RTE_INTR_MODE_MSI: > +#ifndef HAVE_ALLOC_IRQ_VECTORS > + if (pci_enable_msi(udev->pdev) == 0) { > + dev_dbg(&udev->pdev->dev, "using MSI"); > + udev->info.irq_flags = IRQF_NO_THREAD; > + udev->info.irq = udev->pdev->irq; > + udev->mode = RTE_INTR_MODE_MSI; > + break; > + } > +#else > + if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSI) == 1) > { > + dev_dbg(&udev->pdev->dev, "using MSI"); > + udev->info.irq_flags = IRQF_NO_THREAD; > + udev->info.irq = pci_irq_vector(udev->pdev, 0); > + udev->mode = RTE_INTR_MODE_MSI; > + break; > + } > +#endif > + /* fall back to INTX */ > + case RTE_INTR_MODE_LEGACY: > + if (pci_intx_mask_supported(udev->pdev)) { > + dev_dbg(&udev->pdev->dev, "using INTX"); > + udev->info.irq_flags = IRQF_SHARED | > IRQF_NO_THREAD; > + udev->info.irq = udev->pdev->irq; > + udev->mode = RTE_INTR_MODE_LEGACY; > + break; > + } > + dev_notice(&udev->pdev->dev, "PCI INTX mask not > supported\n"); > + /* fall back to no IRQ */ > + case RTE_INTR_MODE_NONE: > + udev->mode = RTE_INTR_MODE_NONE; > + udev->info.irq = UIO_IRQ_NONE; > + break; > + > + default: > + dev_err(&udev->pdev->dev, "invalid IRQ mode %u", > + igbuio_intr_mode_preferred); > + udev->info.irq = UIO_IRQ_NONE; > + err = -EINVAL; > + } > + > + if (udev->info.irq != UIO_IRQ_NONE) > + err = request_irq(udev->info.irq, igbuio_pci_irqhandler, > + udev->info.irq_flags, udev->info.name, > + udev->info.uio_dev); > + dev_info(&udev->pdev->dev, "uio device registered with irq %lx\n", > + udev->info.irq); > + > + return err; > +} > + > +static void > +igbuio_pci_disable_interrupts(struct rte_uio_pci_dev *udev) { > + if (udev->info.irq) { > + free_irq(udev->info.irq, udev->info.uio_dev); > + udev->info.irq = 0; > + } > + > +#ifndef HAVE_ALLOC_IRQ_VECTORS > + if (udev->mode == RTE_INTR_MODE_MSIX) > + pci_disable_msix(udev->pdev); > + if (udev->mode == RTE_INTR_MODE_MSI) > + pci_disable_msi(udev->pdev); > +#else > + if (udev->mode == RTE_INTR_MODE_MSIX || > + udev->mode == RTE_INTR_MODE_MSI) > + pci_free_irq_vectors(udev->pdev); > +#endif > +} > + > + > /** > * This gets called while opening uio device file. > */ > @@ -165,12 +271,19 @@ igbuio_pci_open(struct uio_info *info, struct inode > *inode) { > struct rte_uio_pci_dev *udev = info->priv; > struct pci_dev *dev = udev->pdev; > + int err; > > pci_reset_function(dev); > > /* set bus master, which was cleared by the reset function */ > pci_set_master(dev); > > + /* enable interrupts */ > + err = igbuio_pci_enable_interrupts(udev); > + if (err) { > + dev_err(&dev->dev, "Enable interrupt fails\n"); > + return err; > + } > return 0; > } > > @@ -180,6 +293,9 @@ igbuio_pci_release(struct uio_info *info, struct inode > *inode) > struct rte_uio_pci_dev *udev = info->priv; > struct pci_dev *dev = udev->pdev; > > + /* disable interrupts */ > + igbuio_pci_disable_interrupts(udev); > + > /* stop the device from further DMA */ > pci_clear_master(dev); > > @@ -250,94 +366,6 @@ igbuio_pci_release_iomem(struct uio_info *info) } > > static int > -igbuio_pci_enable_interrupts(struct rte_uio_pci_dev *udev) -{ > - int err = 0; > -#ifndef HAVE_ALLOC_IRQ_VECTORS > - struct msix_entry msix_entry; > -#endif > - > - switch (igbuio_intr_mode_preferred) { > - case RTE_INTR_MODE_MSIX: > - /* Only 1 msi-x vector needed */ > -#ifndef HAVE_ALLOC_IRQ_VECTORS > - msix_entry.entry = 0; > - if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) { > - dev_dbg(&udev->pdev->dev, "using MSI-X"); > - udev->info.irq_flags = IRQF_NO_THREAD; > - udev->info.irq = msix_entry.vector; > - udev->mode = RTE_INTR_MODE_MSIX; > - break; > - } > -#else > - if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSIX) == 1) > { > - dev_dbg(&udev->pdev->dev, "using MSI-X"); > - udev->info.irq_flags = IRQF_NO_THREAD; > - udev->info.irq = pci_irq_vector(udev->pdev, 0); > - udev->mode = RTE_INTR_MODE_MSIX; > - break; > - } > -#endif > - /* fall back to MSI */ > - case RTE_INTR_MODE_MSI: > -#ifndef HAVE_ALLOC_IRQ_VECTORS > - if (pci_enable_msi(udev->pdev) == 0) { > - dev_dbg(&udev->pdev->dev, "using MSI"); > - udev->info.irq_flags = IRQF_NO_THREAD; > - udev->info.irq = udev->pdev->irq; > - udev->mode = RTE_INTR_MODE_MSI; > - break; > - } > -#else > - if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSI) == 1) > { > - dev_dbg(&udev->pdev->dev, "using MSI"); > - udev->info.irq_flags = IRQF_NO_THREAD; > - udev->info.irq = pci_irq_vector(udev->pdev, 0); > - udev->mode = RTE_INTR_MODE_MSI; > - break; > - } > -#endif > - /* fall back to INTX */ > - case RTE_INTR_MODE_LEGACY: > - if (pci_intx_mask_supported(udev->pdev)) { > - dev_dbg(&udev->pdev->dev, "using INTX"); > - udev->info.irq_flags = IRQF_SHARED | > IRQF_NO_THREAD; > - udev->info.irq = udev->pdev->irq; > - udev->mode = RTE_INTR_MODE_LEGACY; > - break; > - } > - dev_notice(&udev->pdev->dev, "PCI INTX mask not > supported\n"); > - /* fall back to no IRQ */ > - case RTE_INTR_MODE_NONE: > - udev->mode = RTE_INTR_MODE_NONE; > - udev->info.irq = UIO_IRQ_NONE; > - break; > - > - default: > - dev_err(&udev->pdev->dev, "invalid IRQ mode %u", > - igbuio_intr_mode_preferred); > - err = -EINVAL; > - } > - > - return err; > -} > - > -static void > -igbuio_pci_disable_interrupts(struct rte_uio_pci_dev *udev) -{ -#ifndef > HAVE_ALLOC_IRQ_VECTORS > - if (udev->mode == RTE_INTR_MODE_MSIX) > - pci_disable_msix(udev->pdev); > - if (udev->mode == RTE_INTR_MODE_MSI) > - pci_disable_msi(udev->pdev); > -#else > - if (udev->mode == RTE_INTR_MODE_MSIX || > - udev->mode == RTE_INTR_MODE_MSI) > - pci_free_irq_vectors(udev->pdev); > -#endif > -} > - > -static int > igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info) { > int i, iom, iop, ret; > @@ -427,20 +455,15 @@ igbuio_pci_probe(struct pci_dev *dev, const struct > pci_device_id *id) > /* fill uio infos */ > udev->info.name = "igb_uio"; > udev->info.version = "0.1"; > - udev->info.handler = igbuio_pci_irqhandler; > udev->info.irqcontrol = igbuio_pci_irqcontrol; > udev->info.open = igbuio_pci_open; > udev->info.release = igbuio_pci_release; > udev->info.priv = udev; > udev->pdev = dev; > > - err = igbuio_pci_enable_interrupts(udev); > - if (err != 0) > - goto fail_release_iomem; > - > err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp); > if (err != 0) > - goto fail_disable_interrupts; > + goto fail_release_iomem; > > /* register uio driver */ > err = uio_register_device(&dev->dev, &udev->info); @@ -449,9 +472,6 > @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) > > pci_set_drvdata(dev, udev); > > - dev_info(&dev->dev, "uio device registered with irq %lx\n", > - udev->info.irq); > - > /* > * Doing a harmless dma mapping for attaching the device to > * the iommu identity mapping if kernel boots with iommu=pt. > @@ -477,8 +497,6 @@ igbuio_pci_probe(struct pci_dev *dev, const struct > pci_device_id *id) > > fail_remove_group: > sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); > -fail_disable_interrupts: > - igbuio_pci_disable_interrupts(udev); > fail_release_iomem: > igbuio_pci_release_iomem(&udev->info); > pci_disable_device(dev); > @@ -495,7 +513,6 @@ igbuio_pci_remove(struct pci_dev *dev) > > sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); > uio_unregister_device(&udev->info); > - igbuio_pci_disable_interrupts(udev); > igbuio_pci_release_iomem(&udev->info); > pci_disable_device(dev); > pci_set_drvdata(dev, NULL); > -- > 2.7.4