> -----Original Message-----
> From: Thomas Monjalon [mailto:tho...@monjalon.net]
> Sent: Saturday, March 2, 2019 10:43 AM
> To: dev@dpdk.org
> Cc: Zhang, Qi Z <qi.z.zh...@intel.com>; sta...@dpdk.org
> Subject: [PATCH 3/3] eal: fix multi-process probe failure handling
> 
> If probe fails in multi-process context, the device must removed in other
> processes for consistency. This is a rollback mechanism.
> However the rollback should not happen for devices which were already probed
> before the current probe transaction.
> 
> When probing an already probed device, the driver may reject with -EEXIST or
> update and succeed with code 0.
> In order to distinguish successful new probe from re-probe, in the function
> local_dev_probe(), the positive EEXIST code is returned for the latter case.
> 
> The functions rte_dev_probe() and __handle_secondary_request() can test for
> -EEXIST and +EEXIST, and skip rollback in such case.
> 
> Fixes: 244d5130719c ("eal: enable hotplug on multi-process")
> Fixes: ac9e4a17370f ("eal: support attach/detach shared device from 
> secondary")
> Cc: qi.z.zh...@intel.com
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Thomas Monjalon <tho...@monjalon.net>
> ---
>  lib/librte_eal/common/eal_common_dev.c | 12 ++++++++++--
>  lib/librte_eal/common/eal_private.h    |  2 +-
>  lib/librte_eal/common/hotplug_mp.c     |  8 ++++++--
>  3 files changed, 17 insertions(+), 5 deletions(-)
> 
> diff --git a/lib/librte_eal/common/eal_common_dev.c
> b/lib/librte_eal/common/eal_common_dev.c
> index deaaea9345..2c7b1ab071 100644
> --- a/lib/librte_eal/common/eal_common_dev.c
> +++ b/lib/librte_eal/common/eal_common_dev.c
> @@ -132,6 +132,7 @@ local_dev_probe(const char *devargs, struct rte_device
> **new_dev)  {
>       struct rte_device *dev;
>       struct rte_devargs *da;
> +     bool already_probed;
>       int ret;
> 
>       *new_dev = NULL;
> @@ -171,12 +172,15 @@ local_dev_probe(const char *devargs, struct rte_device
> **new_dev)
>        * those devargs shouldn't be removed manually anymore.
>        */
> 
> +     already_probed = rte_dev_is_probed(dev);
>       ret = dev->bus->plug(dev);
>       if (ret && !rte_dev_is_probed(dev)) { /* if hasn't ever succeeded */
>               RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
>                       dev->name);
>               return ret;
>       }
> +     if (ret == 0 && already_probed)
> +             ret = EEXIST; /* hint to avoid any rollback */

What if bus->plug return -EEXIST and rte_dev_is_probed return true? (See 
rte_pci_probe_one_driver)
You will not give hint here, but is this expected?

> 
>       *new_dev = dev;
>       return ret;
> @@ -194,6 +198,7 @@ rte_dev_probe(const char *devargs)  {
>       struct eal_dev_mp_req req;
>       struct rte_device *dev;
> +     bool already_probed;
>       int ret;
> 
>       memset(&req, 0, sizeof(req));
> @@ -221,8 +226,8 @@ rte_dev_probe(const char *devargs)
> 
>       /* primary attach the new device itself. */
>       ret = local_dev_probe(devargs, &dev);
> -
> -     if (ret != 0 && ret != -EEXIST) {
> +     already_probed = (ret == -EEXIST || ret == EEXIST);
> +     if (ret < 0 && !already_probed) {
>               RTE_LOG(ERR, EAL,
>                       "Failed to attach device on primary process\n");
>               return ret;
> @@ -250,6 +255,9 @@ rte_dev_probe(const char *devargs)
>       return 0;
> 
>  rollback:
> +     if (already_probed)
> +             return ret; /* skip rollback */
> +
>       req.t = EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK;
> 
>       /* primary send rollback request to secondary. */ diff --git
> a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
> index 798ede553b..a01d252930 100644
> --- a/lib/librte_eal/common/eal_private.h
> +++ b/lib/librte_eal/common/eal_private.h
> @@ -304,7 +304,7 @@ rte_devargs_layers_parse(struct rte_devargs *devargs,
>   * @param new_dev
>   *   new device be probed as output.
>   * @return
> - *   0 on success, negative on error.
> + *   >=0 on success (+EEXIST if already probed), negative on error.
>   */
>  int local_dev_probe(const char *devargs, struct rte_device **new_dev);
> 
> diff --git a/lib/librte_eal/common/hotplug_mp.c
> b/lib/librte_eal/common/hotplug_mp.c
> index 69e9a16d6a..9f8ef28a3b 100644
> --- a/lib/librte_eal/common/hotplug_mp.c
> +++ b/lib/librte_eal/common/hotplug_mp.c
> @@ -90,13 +90,15 @@ __handle_secondary_request(void *param)
>       struct rte_devargs da;
>       struct rte_device *dev;
>       struct rte_bus *bus;
> +     bool already_probed = false;
>       int ret = 0;
> 
>       tmp_req = *req;
> 
>       if (req->t == EAL_DEV_REQ_TYPE_ATTACH) {
>               ret = local_dev_probe(req->devargs, &dev);
> -             if (ret != 0 && ret != -EEXIST) {
> +             already_probed = (ret == -EEXIST || ret == EEXIST);
> +             if (ret < 0 && !already_probed) {
>                       RTE_LOG(ERR, EAL, "Failed to hotplug add device on 
> primary\n");
>                       goto finish;
>               }
> @@ -159,7 +161,7 @@ __handle_secondary_request(void *param)
>       goto finish;
> 
>  rollback:
> -     if (req->t == EAL_DEV_REQ_TYPE_ATTACH) {
> +     if (req->t == EAL_DEV_REQ_TYPE_ATTACH && !already_probed) {
>               tmp_req.t = EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK;
>               eal_dev_hotplug_request_to_secondary(&tmp_req);
>               local_dev_remove(dev);
> @@ -238,6 +240,8 @@ static void __handle_primary_request(void *param)
>       case EAL_DEV_REQ_TYPE_ATTACH:
>       case EAL_DEV_REQ_TYPE_DETACH_ROLLBACK:
>               ret = local_dev_probe(req->devargs, &dev);
> +             if (ret > 0)
> +                     ret = 0; /* return only errors */
>               break;
>       case EAL_DEV_REQ_TYPE_DETACH:
>       case EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK:
> --
> 2.20.1

Reply via email to