4.14-stable review patch. If anyone has any objections, please let me know.
------------------ From: Dexuan Cui <[email protected]> commit 021ad274d7dc31611d4f47f7dd4ac7a224526f30 upstream. When we hot-remove the device, we first receive a PCI_EJECT message and then receive a PCI_BUS_RELATIONS message with bus_rel->device_count == 0. The first message is offloaded to hv_eject_device_work(), and the second is offloaded to pci_devices_present_work(). Both the paths can be running list_del(&hpdev->list_entry), causing general protection fault, because system_wq can run them concurrently. The patch eliminates the race condition. Since access to present/eject work items is serialized, we do not need the hbus->enum_sem anymore, so remove it. Fixes: 4daace0d8ce8 ("PCI: hv: Add paravirtual PCI front-end for Microsoft Hyper-V VMs") Link: https://lkml.kernel.org/r/kl1p15301mb00064da6b4d221123b5241cfbf...@kl1p15301mb0006.apcp153.prod.outlook.com Tested-by: Adrian Suhov <[email protected]> Tested-by: Chris Valean <[email protected]> Signed-off-by: Dexuan Cui <[email protected]> [[email protected]: squashed semaphore removal patch] Signed-off-by: Lorenzo Pieralisi <[email protected]> Reviewed-by: Michael Kelley <[email protected]> Acked-by: Haiyang Zhang <[email protected]> Cc: <[email protected]> # v4.6+ Cc: Vitaly Kuznetsov <[email protected]> Cc: Jack Morgenstein <[email protected]> Cc: Stephen Hemminger <[email protected]> Cc: K. Y. Srinivasan <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]> --- drivers/pci/host/pci-hyperv.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -457,7 +457,6 @@ struct hv_pcibus_device { spinlock_t device_list_lock; /* Protect lists below */ void __iomem *cfg_addr; - struct semaphore enum_sem; struct list_head resources_for_children; struct list_head children; @@ -471,6 +470,8 @@ struct hv_pcibus_device { struct retarget_msi_interrupt retarget_msi_interrupt_params; spinlock_t retarget_msi_interrupt_lock; + + struct workqueue_struct *wq; }; /* @@ -1604,12 +1605,8 @@ static struct hv_pci_dev *get_pcichild_w * It must also treat the omission of a previously observed device as * notification that the device no longer exists. * - * Note that this function is a work item, and it may not be - * invoked in the order that it was queued. Back to back - * updates of the list of present devices may involve queuing - * multiple work items, and this one may run before ones that - * were sent later. As such, this function only does something - * if is the last one in the queue. + * Note that this function is serialized with hv_eject_device_work(), + * because both are pushed to the ordered workqueue hbus->wq. */ static void pci_devices_present_work(struct work_struct *work) { @@ -1630,11 +1627,6 @@ static void pci_devices_present_work(str INIT_LIST_HEAD(&removed); - if (down_interruptible(&hbus->enum_sem)) { - put_hvpcibus(hbus); - return; - } - /* Pull this off the queue and process it if it was the last one. */ spin_lock_irqsave(&hbus->device_list_lock, flags); while (!list_empty(&hbus->dr_list)) { @@ -1651,7 +1643,6 @@ static void pci_devices_present_work(str spin_unlock_irqrestore(&hbus->device_list_lock, flags); if (!dr) { - up(&hbus->enum_sem); put_hvpcibus(hbus); return; } @@ -1738,7 +1729,6 @@ static void pci_devices_present_work(str break; } - up(&hbus->enum_sem); put_hvpcibus(hbus); kfree(dr); } @@ -1784,7 +1774,7 @@ static void hv_pci_devices_present(struc spin_unlock_irqrestore(&hbus->device_list_lock, flags); get_hvpcibus(hbus); - schedule_work(&dr_wrk->wrk); + queue_work(hbus->wq, &dr_wrk->wrk); } /** @@ -1862,7 +1852,7 @@ static void hv_pci_eject_device(struct h get_pcichild(hpdev, hv_pcidev_ref_pnp); INIT_WORK(&hpdev->wrk, hv_eject_device_work); get_hvpcibus(hpdev->hbus); - schedule_work(&hpdev->wrk); + queue_work(hpdev->hbus->wq, &hpdev->wrk); } /** @@ -2475,13 +2465,18 @@ static int hv_pci_probe(struct hv_device spin_lock_init(&hbus->config_lock); spin_lock_init(&hbus->device_list_lock); spin_lock_init(&hbus->retarget_msi_interrupt_lock); - sema_init(&hbus->enum_sem, 1); init_completion(&hbus->remove_event); + hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0, + hbus->sysdata.domain); + if (!hbus->wq) { + ret = -ENOMEM; + goto free_bus; + } ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0, hv_pci_onchannelcallback, hbus); if (ret) - goto free_bus; + goto destroy_wq; hv_set_drvdata(hdev, hbus); @@ -2550,6 +2545,8 @@ free_config: hv_free_config_window(hbus); close: vmbus_close(hdev->channel); +destroy_wq: + destroy_workqueue(hbus->wq); free_bus: free_page((unsigned long)hbus); return ret; @@ -2629,6 +2626,7 @@ static int hv_pci_remove(struct hv_devic irq_domain_free_fwnode(hbus->sysdata.fwnode); put_hvpcibus(hbus); wait_for_completion(&hbus->remove_event); + destroy_workqueue(hbus->wq); free_page((unsigned long)hbus); return 0; }

