Query the link status can ends up in an inconsist state where the port is down but it is reporting speed. For that another query is sceduled for a later time. A race condition is possible between the scheduled call and the next link status interrupt from the device. On some sceraios it will cause the link to always stay on an inconsist state. This patch addresses the race condition. Once delayed query is scheduled no other agents beside it can query the link status.
Fixes: 198a3c339a8f ("mlx5: handle link status interrupts") CC: sta...@dpdk.org Signed-off-by: Shahaf Shuler <shah...@mellanox.com> --- drivers/net/mlx5/mlx5_ethdev.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index e77238f..cc61244 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -1125,9 +1125,10 @@ struct priv * * Nonzero if the callback process can be called immediately. */ static int -priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev) +priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev, int is_alarm) { struct ibv_async_event event; + struct rte_eth_link *link = &dev->data->dev_link; int port_change = 0; int ret = 0; @@ -1144,22 +1145,20 @@ struct priv * event.event_type, event.element.port_num); ibv_ack_async_event(&event); } - - if (port_change ^ priv->pending_alarm) { - struct rte_eth_link *link = &dev->data->dev_link; - - priv->pending_alarm = 0; - mlx5_link_update(dev, 0); - if (((link->link_speed == 0) && link->link_status) || - ((link->link_speed != 0) && !link->link_status)) { - /* Inconsistent status, check again later. */ - priv->pending_alarm = 1; - rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US, - mlx5_dev_link_status_handler, - dev); - } else - ret = 1; - } + if ((priv->pending_alarm && !is_alarm) || + (!priv->pending_alarm && !port_change)) + return ret; + priv->pending_alarm = 0; + mlx5_link_update(dev, 0); + if (((link->link_speed == 0) && link->link_status) || + ((link->link_speed != 0) && !link->link_status)) { + /* Inconsistent status, check again later. */ + priv->pending_alarm = 1; + rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US, + mlx5_dev_link_status_handler, + dev); + } else + ret = 1; return ret; } @@ -1178,7 +1177,7 @@ struct priv * priv_lock(priv); assert(priv->pending_alarm == 1); - ret = priv_dev_link_status_handler(priv, dev); + ret = priv_dev_link_status_handler(priv, dev, 1); priv_unlock(priv); if (ret) _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); @@ -1201,7 +1200,7 @@ struct priv * (void)intr_handle; priv_lock(priv); - ret = priv_dev_link_status_handler(priv, dev); + ret = priv_dev_link_status_handler(priv, dev, 0); priv_unlock(priv); if (ret) _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); -- 1.8.3.1