This patch implements the ops for the PCI bus sigbus handler. It finds the
PCI device that is being hot-unplugged and calls the relevant ops of the
hot-unplug handler to handle the hot-unplug failure of the device.

Signed-off-by: Jeff Guo <jia....@intel.com>
Acked-by: Shaopeng He <shaopeng...@intel.com>
---
v12->v11:
no change.
---
 drivers/bus/pci/pci_common.c | 53 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index d286234..f313fe9 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -405,6 +405,36 @@ pci_find_device(const struct rte_device *start, 
rte_dev_cmp_t cmp,
        return NULL;
 }
 
+/**
+ * find the device which encounter the failure, by iterate over all device on
+ * PCI bus to check if the memory failure address is located in the range
+ * of the BARs of the device.
+ */
+static struct rte_pci_device *
+pci_find_device_by_addr(const void *failure_addr)
+{
+       struct rte_pci_device *pdev = NULL;
+       int i;
+
+       FOREACH_DEVICE_ON_PCIBUS(pdev) {
+               for (i = 0; i != RTE_DIM(pdev->mem_resource); i++) {
+                       if ((uint64_t)(uintptr_t)failure_addr >=
+                           (uint64_t)(uintptr_t)pdev->mem_resource[i].addr &&
+                           (uint64_t)(uintptr_t)failure_addr <
+                           (uint64_t)(uintptr_t)pdev->mem_resource[i].addr +
+                           pdev->mem_resource[i].len) {
+                               RTE_LOG(INFO, EAL, "Failure address "
+                                       "%16.16"PRIx64" belongs to "
+                                       "device %s!\n",
+                                       (uint64_t)(uintptr_t)failure_addr,
+                                       pdev->device.name);
+                               return pdev;
+                       }
+               }
+       }
+       return NULL;
+}
+
 static int
 pci_hot_unplug_handler(struct rte_device *dev)
 {
@@ -433,6 +463,28 @@ pci_hot_unplug_handler(struct rte_device *dev)
 }
 
 static int
+pci_sigbus_handler(const void *failure_addr)
+{
+       struct rte_pci_device *pdev = NULL;
+       int ret = 0;
+
+       pdev = pci_find_device_by_addr(failure_addr);
+       if (!pdev) {
+               /* It is a generic sigbus error, no bus would handle it. */
+               ret = 1;
+       } else {
+               /* The sigbus error is caused of hot-unplug. */
+               ret = pci_hot_unplug_handler(&pdev->device);
+               if (ret) {
+                       RTE_LOG(ERR, EAL, "Failed to handle hot-unplug for "
+                               "device %s", pdev->name);
+                       ret = -1;
+               }
+       }
+       return ret;
+}
+
+static int
 pci_plug(struct rte_device *dev)
 {
        return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev));
@@ -463,6 +515,7 @@ struct rte_pci_bus rte_pci_bus = {
                .parse = pci_parse,
                .get_iommu_class = rte_pci_get_iommu_class,
                .hot_unplug_handler = pci_hot_unplug_handler,
+               .sigbus_handler = pci_sigbus_handler,
        },
        .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
        .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
-- 
2.7.4

Reply via email to