From: Feras Daoud <fera...@mellanox.com>

Since the FW can be shared between different PFs/VFs it is common
that more than one health poll will detected a failure, this can
lead to multiple resets which are unneeded.

The solution is to use a FW locking mechanism using semaphore space
to provide a way to allow only one device to collect the cr-dump and
to issue a sw-reset.

Signed-off-by: Feras Daoud <fera...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Reviewed-by: Alex Vesker <va...@mellanox.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/lib/pci_vsc.c | 40 ++++++++++++++++---
 .../ethernet/mellanox/mlx5/core/lib/pci_vsc.h |  8 ++++
 .../ethernet/mellanox/mlx5/core/mlx5_core.h   |  4 ++
 3 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c 
b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c
index f42890bdd6b1..b6b8fb13f621 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c
@@ -24,11 +24,6 @@
        pci_write_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val))
 #define VSC_MAX_RETRIES 2048
 
-enum mlx5_vsc_state {
-       MLX5_VSC_UNLOCK,
-       MLX5_VSC_LOCK,
-};
-
 enum {
        VSC_CTRL_OFFSET = 0x4,
        VSC_COUNTER_OFFSET = 0x8,
@@ -281,3 +276,38 @@ int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, 
u32 *data,
        }
        return length;
 }
+
+int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space,
+                          enum mlx5_vsc_state state)
+{
+       u32 data, id = 0;
+       int ret;
+
+       ret = mlx5_vsc_gw_set_space(dev, MLX5_SEMAPHORE_SPACE_DOMAIN, NULL);
+       if (ret) {
+               mlx5_core_warn(dev, "Failed to set gw space %d\n", ret);
+               return ret;
+       }
+
+       if (state == MLX5_VSC_LOCK) {
+               /* Get a unique ID based on the counter */
+               ret = vsc_read(dev, VSC_COUNTER_OFFSET, &id);
+               if (ret)
+                       return ret;
+       }
+
+       /* Try to modify lock */
+       ret = mlx5_vsc_gw_write(dev, space, id);
+       if (ret)
+               return ret;
+
+       /* Verify lock was modified */
+       ret = mlx5_vsc_gw_read(dev, space, &data);
+       if (ret)
+               return -EINVAL;
+
+       if (data != id)
+               return -EBUSY;
+
+       return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h 
b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h
index c6ebf59006c5..4264b65f7437 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h
@@ -4,6 +4,11 @@
 #ifndef __MLX5_PCI_VSC_H__
 #define __MLX5_PCI_VSC_H__
 
+enum mlx5_vsc_state {
+       MLX5_VSC_UNLOCK,
+       MLX5_VSC_LOCK,
+};
+
 enum {
        MLX5_VSC_SPACE_SCAN_CRSPACE = 0x7,
 };
@@ -22,4 +27,7 @@ static inline bool mlx5_vsc_accessible(struct mlx5_core_dev 
*dev)
        return !!dev->vsc_addr;
 }
 
+int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space,
+                          enum mlx5_vsc_state state);
+
 #endif /* __MLX5_PCI_VSC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h 
b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index d31b77ad533d..439cf23945a4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -111,6 +111,10 @@ enum {
        MLX5_DRIVER_SYND = 0xbadd00de,
 };
 
+enum mlx5_semaphore_space_address {
+       MLX5_SEMAPHORE_SPACE_DOMAIN     = 0xA,
+};
+
 int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
 int mlx5_query_board_id(struct mlx5_core_dev *dev);
 int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id);
-- 
2.20.1

Reply via email to