On Tue, Jan 31, 2017 at 03:13:38PM +0200, Shahaf Shuler wrote:
> Query the link status can end up in an inconsist state where the port is
> down but it is reporting speed. For that another query is scheduled for a
> later time.
> A race condition is possible between the scheduled call and other link
> status interrupt handlers. When the scheduled query by-pass those handlers,
> the link status will be stuck in an in-consist state.
> This patch addresses the race condition by not blocking link status
> queries in case delayed query is on the flight.
> 
> Fixes: 198a3c339a8f ("mlx5: handle link status interrupts")
> CC: sta...@dpdk.org
> 
> Signed-off-by: Shahaf Shuler <shah...@mellanox.com>
> ---
> on v3:
> * allow only single alarm on the flight.
> 
> on v2:
> * change the commit log to better explain the race
> * instead of blocking other interrupt handlers allow everyone to query the 
> link status
> ---
>  drivers/net/mlx5/mlx5_ethdev.c | 26 +++++++++++---------------
>  1 file changed, 11 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
> index e77238f..fcbe273 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -1128,7 +1128,7 @@ struct priv *
>  priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev)
>  {
>       struct ibv_async_event event;
> -     int port_change = 0;
> +     struct rte_eth_link *link = &dev->data->dev_link;
>       int ret = 0;
>  
>       /* Read all message and acknowledge them. */
> @@ -1136,29 +1136,24 @@ struct priv *
>               if (ibv_get_async_event(priv->ctx, &event))
>                       break;
>  
> -             if (event.event_type == IBV_EVENT_PORT_ACTIVE ||
> -                 event.event_type == IBV_EVENT_PORT_ERR)
> -                     port_change = 1;
> -             else
> +             if (event.event_type != IBV_EVENT_PORT_ACTIVE &&
> +                 event.event_type != IBV_EVENT_PORT_ERR)
>                       DEBUG("event type %d on port %d not handled",
>                             event.event_type, event.element.port_num);
>               ibv_ack_async_event(&event);
>       }
> -
> -     if (port_change ^ priv->pending_alarm) {
> -             struct rte_eth_link *link = &dev->data->dev_link;
> -
> -             priv->pending_alarm = 0;
> -             mlx5_link_update(dev, 0);
> -             if (((link->link_speed == 0) && link->link_status) ||
> -                 ((link->link_speed != 0) && !link->link_status)) {
> +     mlx5_link_update(dev, 0);
> +     if (((link->link_speed == 0) && link->link_status) ||
> +         ((link->link_speed != 0) && !link->link_status)) {
> +             if (!priv->pending_alarm) {
>                       /* Inconsistent status, check again later. */
>                       priv->pending_alarm = 1;
>                       rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
>                                         mlx5_dev_link_status_handler,
>                                         dev);
> -             } else
> -                     ret = 1;
> +             }
> +     } else {
> +             ret = 1;
>       }
>       return ret;
>  }
> @@ -1178,6 +1173,7 @@ struct priv *
>  
>       priv_lock(priv);
>       assert(priv->pending_alarm == 1);
> +     priv->pending_alarm = 0;
>       ret = priv_dev_link_status_handler(priv, dev);
>       priv_unlock(priv);
>       if (ret)
> -- 
> 1.8.3.1

Acked-by: Nelio Laranjeiro <nelio.laranje...@6wind.com>

-- 
Nélio Laranjeiro
6WIND

Reply via email to