From: Feras Daoud <fera...@mellanox.com> Since the FW can be shared between different PFs/VFs it is common that more than one health poll will detected a failure, this can lead to multiple resets which are unneeded.
The solution is to use a FW locking mechanism using semaphore space to provide a way to allow only one device to collect the cr-dump and to issue a sw-reset. Signed-off-by: Feras Daoud <fera...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> Reviewed-by: Alex Vesker <va...@mellanox.com> Signed-off-by: Saeed Mahameed <sae...@mellanox.com> --- .../ethernet/mellanox/mlx5/core/lib/pci_vsc.c | 40 ++++++++++++++++--- .../ethernet/mellanox/mlx5/core/lib/pci_vsc.h | 8 ++++ .../ethernet/mellanox/mlx5/core/mlx5_core.h | 4 ++ 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c index f42890bdd6b1..b6b8fb13f621 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c @@ -24,11 +24,6 @@ pci_write_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val)) #define VSC_MAX_RETRIES 2048 -enum mlx5_vsc_state { - MLX5_VSC_UNLOCK, - MLX5_VSC_LOCK, -}; - enum { VSC_CTRL_OFFSET = 0x4, VSC_COUNTER_OFFSET = 0x8, @@ -281,3 +276,38 @@ int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data, } return length; } + +int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space, + enum mlx5_vsc_state state) +{ + u32 data, id = 0; + int ret; + + ret = mlx5_vsc_gw_set_space(dev, MLX5_SEMAPHORE_SPACE_DOMAIN, NULL); + if (ret) { + mlx5_core_warn(dev, "Failed to set gw space %d\n", ret); + return ret; + } + + if (state == MLX5_VSC_LOCK) { + /* Get a unique ID based on the counter */ + ret = vsc_read(dev, VSC_COUNTER_OFFSET, &id); + if (ret) + return ret; + } + + /* Try to modify lock */ + ret = mlx5_vsc_gw_write(dev, space, id); + if (ret) + return ret; + + /* Verify lock was modified */ + ret = mlx5_vsc_gw_read(dev, space, &data); + if (ret) + return -EINVAL; + + if (data != id) + return -EBUSY; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h index c6ebf59006c5..4264b65f7437 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h @@ -4,6 +4,11 @@ #ifndef __MLX5_PCI_VSC_H__ #define __MLX5_PCI_VSC_H__ +enum mlx5_vsc_state { + MLX5_VSC_UNLOCK, + MLX5_VSC_LOCK, +}; + enum { MLX5_VSC_SPACE_SCAN_CRSPACE = 0x7, }; @@ -22,4 +27,7 @@ static inline bool mlx5_vsc_accessible(struct mlx5_core_dev *dev) return !!dev->vsc_addr; } +int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space, + enum mlx5_vsc_state state); + #endif /* __MLX5_PCI_VSC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index d31b77ad533d..439cf23945a4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -111,6 +111,10 @@ enum { MLX5_DRIVER_SYND = 0xbadd00de, }; +enum mlx5_semaphore_space_address { + MLX5_SEMAPHORE_SPACE_DOMAIN = 0xA, +}; + int mlx5_query_hca_caps(struct mlx5_core_dev *dev); int mlx5_query_board_id(struct mlx5_core_dev *dev); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id); -- 2.20.1