Hi Jeff,

> As of device hot unplug, we need some preparatory measures so that we will
> not encounter memory fault after device be plug out of the system,
> and also let we could recover the running data path but not been break.
> This patch allows the buses to handle device hot unplug event.
> The patch only enable the ops in pci bus, when handle device hot unplug
> event, remap a dummy memory to avoid bus read/write error.
> Other buses could accordingly implement this ops specific by themselves.
> 
> Signed-off-by: Jeff Guo <jia....@intel.com>
> ---
> v20->19:
> clean the code
> ---
>  drivers/bus/pci/pci_common.c            | 67 
> +++++++++++++++++++++++++++++++++
>  drivers/bus/pci/pci_common_uio.c        | 32 ++++++++++++++++
>  drivers/bus/pci/private.h               | 12 ++++++
>  lib/librte_eal/common/include/rte_bus.h | 16 ++++++++
>  4 files changed, 127 insertions(+)
> 
> diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
> index 2a00f36..709eaf3 100644
> --- a/drivers/bus/pci/pci_common.c
> +++ b/drivers/bus/pci/pci_common.c
> @@ -474,6 +474,72 @@ pci_find_device(const struct rte_device *start, 
> rte_dev_cmp_t cmp,
>  }
> 
>  static int
> +pci_handle_hot_unplug(struct rte_device *dev, void *failure_addr)
> +{
> +     struct rte_pci_device *pdev = NULL;
> +     int ret = 0, i, isfound = 0;
> +
> +     if (failure_addr != NULL) {
> +             FOREACH_DEVICE_ON_PCIBUS(pdev) {
> +                     for (i = 0; i != sizeof(pdev->mem_resource) /
> +                             sizeof(pdev->mem_resource[0]); i++) {

You can do i != RTE_DIM(pdev->mem_resource) here.

> +                             if ((uint64_t)failure_addr >=
> +                                 (uint64_t)pdev->mem_resource[i].addr &&
> +                                 (uint64_t)failure_addr <=
> +                                 (uint64_t)pdev->mem_resource[i].addr +
> +                                 pdev->mem_resource[i].len) {


I think it should be failure_addr < addr + len

> +                                     RTE_LOG(ERR, EAL, "Failure address "
> +                                             "%16.16"PRIx64" is belong to "
> +                                             "resource of device %s!\n",
> +                                             (uint64_t)failure_addr,
> +                                             pdev->device.name);
> +                                     isfound = 1;
> +                                     break;
> +                             }
> +                     }
> +                     if (isfound)
> +                             break;


Might be it is a good thing to put the code that searches for address into a 
separate function. 

> +             }
> +     } else if (dev != NULL) {
> +             pdev = RTE_DEV_TO_PCI(dev);
> +     } else {
> +             return -EINVAL;
> +     }
> +
> +     if (!pdev)
> +             return -1;
> +
> +     /* remap resources for devices */
> +     switch (pdev->kdrv) {
> +     case RTE_KDRV_VFIO:
> +#ifdef VFIO_PRESENT
> +             /* TODO */
> +#endif

Should set ret =-1 as not implemented now.

> +             break;
> +     case RTE_KDRV_IGB_UIO:
> +     case RTE_KDRV_UIO_GENERIC:
> +             if (rte_eal_using_phys_addrs()) {
> +                     /* map resources for devices that use uio */
> +                     ret = pci_uio_remap_resource(pdev);
> +             }
> +             break;
> +     case RTE_KDRV_NIC_UIO:
> +             ret = pci_uio_remap_resource(pdev);
> +             break;
> +     default:
> +             RTE_LOG(DEBUG, EAL,
> +                     "  Not managed by a supported kernel driver, 
> skipped\n");
> +             ret = -1;
> +             break;
> +     }
> +
> +     if (ret != 0)
> +             RTE_LOG(ERR, EAL, "failed to handle hot unplug of %s",
> +                     pdev->name);
> +     return ret;
> +}
> +
> +static int
>  pci_plug(struct rte_device *dev)
>  {
>       return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev));
> @@ -503,6 +569,7 @@ struct rte_pci_bus rte_pci_bus = {
>               .unplug = pci_unplug,
>               .parse = pci_parse,
>               .get_iommu_class = rte_pci_get_iommu_class,
> +             .handle_hot_unplug = pci_handle_hot_unplug,
>       },
>       .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
>       .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
> diff --git a/drivers/bus/pci/pci_common_uio.c 
> b/drivers/bus/pci/pci_common_uio.c
> index 54bc20b..ba2c458 100644
> --- a/drivers/bus/pci/pci_common_uio.c
> +++ b/drivers/bus/pci/pci_common_uio.c
> @@ -146,6 +146,38 @@ pci_uio_unmap(struct mapped_pci_resource *uio_res)
>       }
>  }
> 
> +/* remap the PCI resource of a PCI device in anonymous virtual memory */
> +int
> +pci_uio_remap_resource(struct rte_pci_device *dev)
> +{
> +     int i;
> +     void *map_address;
> +
> +     if (dev == NULL)
> +             return -1;
> +
> +     /* Remap all BARs */
> +     for (i = 0; i != PCI_MAX_RESOURCE; i++) {
> +             /* skip empty BAR */
> +             if (dev->mem_resource[i].phys_addr == 0)
> +                     continue;
> +             pci_unmap_resource(dev->mem_resource[i].addr,
> +                             (size_t)dev->mem_resource[i].len);
> +             map_address = pci_map_resource(
> +                             dev->mem_resource[i].addr, -1, 0,
> +                             (size_t)dev->mem_resource[i].len,
> +                             MAP_ANONYMOUS | MAP_FIXED);

Instead of using mumap/mmap() can we use mremap() here?
Might be a bit safer approach.

> +             if (map_address == MAP_FAILED) {
> +                     RTE_LOG(ERR, EAL,
> +                             "Cannot remap resource for device %s\n",
> +                             dev->name);
> +                     return -1;
> +             }
> +     }
> +
> +     return 0;
> +}
> +
>  static struct mapped_pci_resource *
>  pci_uio_find_resource(struct rte_pci_device *dev)
>  {
> diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h
> index 88fa587..cc1668c 100644
> --- a/drivers/bus/pci/private.h
> +++ b/drivers/bus/pci/private.h
> @@ -173,6 +173,18 @@ void pci_uio_free_resource(struct rte_pci_device *dev,
>               struct mapped_pci_resource *uio_res);
> 
>  /**
> + * remap the pci uio resource.
> + *
> + * @param dev
> + *   Point to the struct rte pci device.
> + * @return
> + *   - On success, zero.
> + *   - On failure, a negative value.
> + */
> +int
> +pci_uio_remap_resource(struct rte_pci_device *dev);
> +
> +/**
>   * Map device memory to uio resource
>   *
>   * This function is private to EAL.
> diff --git a/lib/librte_eal/common/include/rte_bus.h 
> b/lib/librte_eal/common/include/rte_bus.h
> index 6fb0834..d2c5778 100644
> --- a/lib/librte_eal/common/include/rte_bus.h
> +++ b/lib/librte_eal/common/include/rte_bus.h
> @@ -168,6 +168,20 @@ typedef int (*rte_bus_unplug_t)(struct rte_device *dev);
>  typedef int (*rte_bus_parse_t)(const char *name, void *addr);
> 
>  /**
> + * Implementation specific hot unplug handler function which is responsible
> + * for handle the failure when hot unplug the device, guaranty the system
> + * would not crash in the case.
> + * @param dev
> + *   Pointer of the device structure.
> + *
> + * @return
> + *   0 on success.
> + *   !0 on error.
> + */
> +typedef int (*rte_bus_handle_hot_unplug_t)(struct rte_device *dev,
> +                                             void *dev_addr);
> +
> +/**
>   * Bus scan policies
>   */
>  enum rte_bus_scan_mode {
> @@ -209,6 +223,8 @@ struct rte_bus {
>       rte_bus_plug_t plug;         /**< Probe single device for drivers */
>       rte_bus_unplug_t unplug;     /**< Remove single device from driver */
>       rte_bus_parse_t parse;       /**< Parse a device name */
> +     rte_bus_handle_hot_unplug_t handle_hot_unplug; /**< handle hot unplug
> +                                                     device event */
>       struct rte_bus_conf conf;    /**< Bus configuration */
>       rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */
>  };
> --
> 2.7.4

Reply via email to