Query the link status can end up in an inconsist state where the port is down but it is reporting speed. For that another query is scheduled for a later time. A race condition is possible between the scheduled call and other link status interrupt handlers. When the scheduled query by-pass those handlers, the link status will be stuck in an in-consist state. This patch addresses the race condition by not blocking link status queries in case delayed query is on the flight.
Fixes: 198a3c339a8f ("mlx5: handle link status interrupts") CC: sta...@dpdk.org Signed-off-by: Shahaf Shuler <shah...@mellanox.com> --- on v3: * allow only single alarm on the flight. on v2: * change the commit log to better explain the race * instead of blocking other interrupt handlers allow everyone to query the link status --- drivers/net/mlx5/mlx5_ethdev.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index e77238f..fcbe273 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -1128,7 +1128,7 @@ struct priv * priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev) { struct ibv_async_event event; - int port_change = 0; + struct rte_eth_link *link = &dev->data->dev_link; int ret = 0; /* Read all message and acknowledge them. */ @@ -1136,29 +1136,24 @@ struct priv * if (ibv_get_async_event(priv->ctx, &event)) break; - if (event.event_type == IBV_EVENT_PORT_ACTIVE || - event.event_type == IBV_EVENT_PORT_ERR) - port_change = 1; - else + if (event.event_type != IBV_EVENT_PORT_ACTIVE && + event.event_type != IBV_EVENT_PORT_ERR) DEBUG("event type %d on port %d not handled", event.event_type, event.element.port_num); ibv_ack_async_event(&event); } - - if (port_change ^ priv->pending_alarm) { - struct rte_eth_link *link = &dev->data->dev_link; - - priv->pending_alarm = 0; - mlx5_link_update(dev, 0); - if (((link->link_speed == 0) && link->link_status) || - ((link->link_speed != 0) && !link->link_status)) { + mlx5_link_update(dev, 0); + if (((link->link_speed == 0) && link->link_status) || + ((link->link_speed != 0) && !link->link_status)) { + if (!priv->pending_alarm) { /* Inconsistent status, check again later. */ priv->pending_alarm = 1; rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US, mlx5_dev_link_status_handler, dev); - } else - ret = 1; + } + } else { + ret = 1; } return ret; } @@ -1178,6 +1173,7 @@ struct priv * priv_lock(priv); assert(priv->pending_alarm == 1); + priv->pending_alarm = 0; ret = priv_dev_link_status_handler(priv, dev); priv_unlock(priv); if (ret) -- 1.8.3.1