mlx5_link_update immediately returns when called with no-wait parameter
and its call for retrieving the link status returns with EAGAIN error.
This is too harsh on busy systems where a first call fails with EAGAIN
from time to time.
This patch adds a (very limited) retry on such cases in order to allow
retrieving the link status.

Signed-off-by: Moti Haimovsky <mo...@mellanox.com>
---
V2:
* Code rebase
---
 drivers/net/mlx5/mlx5_defs.h   | 3 +++
 drivers/net/mlx5/mlx5_ethdev.c | 5 +++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index 180122d..248ef3c 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -105,6 +105,9 @@
 /* Timeout in seconds to get a valid link status. */
 #define MLX5_LINK_STATUS_TIMEOUT 10
 
+/* Number of times to retry retrieving the physical link information. */
+#define MLX5_GET_LINK_STATUS_RETRY_COUNT 3
+
 /* Maximum number of UAR pages used by a port,
  * These are the size and mask for an array of mutexes used to synchronize
  * the access to port's UARs on platforms that do not support 64 bit writes.
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index f2b1752..5f05b2b 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -999,6 +999,7 @@ int mlx5_fw_version_get(struct rte_eth_dev *dev, char 
*fw_ver, size_t fw_size)
        int ret;
        struct rte_eth_link dev_link;
        time_t start_time = time(NULL);
+       int retry = MLX5_GET_LINK_STATUS_RETRY_COUNT;
 
        do {
                ret = mlx5_link_update_unlocked_gs(dev, &dev_link);
@@ -1007,7 +1008,7 @@ int mlx5_fw_version_get(struct rte_eth_dev *dev, char 
*fw_ver, size_t fw_size)
                if (ret == 0)
                        break;
                /* Handle wait to complete situation. */
-               if (wait_to_complete && ret == -EAGAIN) {
+               if ((wait_to_complete || retry) && ret == -EAGAIN) {
                        if (abs((int)difftime(time(NULL), start_time)) <
                            MLX5_LINK_STATUS_TIMEOUT) {
                                usleep(0);
@@ -1019,7 +1020,7 @@ int mlx5_fw_version_get(struct rte_eth_dev *dev, char 
*fw_ver, size_t fw_size)
                } else if (ret < 0) {
                        return ret;
                }
-       } while (wait_to_complete);
+       } while (wait_to_complete || retry-- > 0);
        ret = !!memcmp(&dev->data->dev_link, &dev_link,
                       sizeof(struct rte_eth_link));
        dev->data->dev_link = dev_link;
-- 
1.8.3.1

Reply via email to