On Tue, 19 Jul 2016 15:32:43 +0800 Zhou Jie <zhoujie2...@cn.fujitsu.com> wrote:
> From: Chen Fan <chen.fan.f...@cn.fujitsu.com> > > During aer err occurs and resume do following to > protect device from being accessed. > 1. Make config space read only. > 2. Disable INTx/MSI Interrupt. > 3. Do nothing for bar regions. > > Signed-off-by: Zhou Jie <zhoujie2...@cn.fujitsu.com> > --- > drivers/vfio/pci/vfio_pci.c | 30 ++++++++++++++++++++++++++++++ > drivers/vfio/pci/vfio_pci_private.h | 2 ++ > include/uapi/linux/vfio.h | 2 ++ > 3 files changed, 34 insertions(+) > > diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c > index 2d12b03..dd96b60 100644 > --- a/drivers/vfio/pci/vfio_pci.c > +++ b/drivers/vfio/pci/vfio_pci.c > @@ -318,6 +318,7 @@ static int vfio_pci_open(void *device_data) > return -ENODEV; > > mutex_lock(&driver_lock); > + init_completion(&vdev->aer_error_completion); > > if (!vdev->refcnt) { > ret = vfio_pci_enable(vdev); > @@ -571,6 +572,16 @@ static long vfio_pci_ioctl(void *device_data, > struct vfio_pci_device *vdev = device_data; > unsigned long minsz; > > + if (vdev->aer_error_in_progress && (cmd == VFIO_DEVICE_SET_IRQS || > + cmd == VFIO_DEVICE_RESET || cmd == VFIO_DEVICE_PCI_HOT_RESET)) { > + int ret; > + ret = wait_for_completion_interruptible( > + &vdev->aer_error_completion); > + if (ret) { > + return ret; > + } No brackets necessary. > + } > + > if (cmd == VFIO_DEVICE_GET_INFO) { > struct vfio_device_info info; > > @@ -587,6 +598,10 @@ static long vfio_pci_ioctl(void *device_data, > if (vdev->reset_works) > info.flags |= VFIO_DEVICE_FLAGS_RESET; > > + info.flags |= VFIO_DEVICE_FLAGS_AERPROCESS; > + if (vdev->aer_error_in_progress) > + info.flags |= VFIO_DEVICE_FLAGS_INAERPROCESS; > + > info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions; > info.num_irqs = VFIO_PCI_NUM_IRQS; > > @@ -996,6 +1011,14 @@ static ssize_t vfio_pci_rw(void *device_data, char > __user *buf, > > switch (index) { > case VFIO_PCI_CONFIG_REGION_INDEX: > + if (vdev->aer_error_in_progress && iswrite) { > + int ret; > + ret = wait_for_completion_interruptible( > + &vdev->aer_error_completion); > + if (ret) { > + return ret; > + } > + } > return vfio_pci_config_rw(vdev, buf, count, ppos, iswrite); > > case VFIO_PCI_ROM_REGION_INDEX: > @@ -1226,6 +1249,10 @@ static pci_ers_result_t > vfio_pci_aer_err_detected(struct pci_dev *pdev, > > mutex_lock(&vdev->igate); > > + vdev->aer_error_in_progress = true; > + vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE | > + VFIO_IRQ_SET_ACTION_TRIGGER, > + vdev->irq_type, 0, 0, NULL); > if (vdev->err_trigger) > eventfd_signal(vdev->err_trigger, 1); > > @@ -1252,6 +1279,9 @@ static void vfio_pci_aer_resume(struct pci_dev *pdev) > } > > mutex_lock(&vdev->igate); > + > + vdev->aer_error_in_progress = false; > + complete_all(&vdev->aer_error_completion); > if (vdev->resume_trigger) > eventfd_signal(vdev->resume_trigger, 1); > > diff --git a/drivers/vfio/pci/vfio_pci_private.h > b/drivers/vfio/pci/vfio_pci_private.h > index 80d4ddd..2f151f5 100644 > --- a/drivers/vfio/pci/vfio_pci_private.h > +++ b/drivers/vfio/pci/vfio_pci_private.h > @@ -84,6 +84,8 @@ struct vfio_pci_device { > bool has_vga; > bool needs_reset; > bool nointx; > + bool aer_error_in_progress; > + struct completion aer_error_completion; > struct pci_saved_state *pci_saved_state; > int refcnt; > struct eventfd_ctx *err_trigger; > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > index 34ab138..276ce50 100644 > --- a/include/uapi/linux/vfio.h > +++ b/include/uapi/linux/vfio.h > @@ -198,6 +198,8 @@ struct vfio_device_info { > #define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */ > #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */ > #define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ > +#define VFIO_DEVICE_FLAGS_AERPROCESS (1 << 4) /* support aer error > progress */ > +#define VFIO_DEVICE_FLAGS_INAERPROCESS (1 << 5)/* status in aer error > progress */ > __u32 num_regions; /* Max region index + 1 */ > __u32 num_irqs; /* Max IRQ index + 1 */ > }; Clearly this has only been tested for a single instance of an AER error event and resume per device. Are the things you're intending to block actually blocked for subsequent events? Note how complete_all() fills the done field to let all current and future waiters go through and nowhere is there a call to reinit_completion() to drain that path. Thanks, Alex