From: Itamar Gozlan <igoz...@nvidia.com> Currently, when a firmware (FW) failure occurs during matcher error flow, the system attempts to reconnect the matcher, leading to a segmentation fault. This happens because the matcher is freed but remains in the list.
Example scenario: Given matchers M1->M2->M3, if a FW failure occurs: 1. System tries to destroy M1 and fails 2. M1 remains in the list but is freed 3. When destroying M2, it attempts to remove itself and create M1->M3 4. This results in a segmentation fault as M1 is already freed Signed-off-by: Itamar Gozlan <igoz...@nvidia.com> --- drivers/net/mlx5/hws/mlx5dr_matcher.c | 31 +++++++++++---------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/net/mlx5/hws/mlx5dr_matcher.c b/drivers/net/mlx5/hws/mlx5dr_matcher.c index 54460cc82b..c9922546fe 100644 --- a/drivers/net/mlx5/hws/mlx5dr_matcher.c +++ b/drivers/net/mlx5/hws/mlx5dr_matcher.c @@ -290,8 +290,8 @@ static int mlx5dr_matcher_connect(struct mlx5dr_matcher *matcher) static int mlx5dr_matcher_disconnect(struct mlx5dr_matcher *matcher) { - struct mlx5dr_matcher *tmp_matcher, *prev_matcher; struct mlx5dr_table *tbl = matcher->tbl; + struct mlx5dr_matcher *tmp_matcher; struct mlx5dr_devx_obj *prev_ft; struct mlx5dr_matcher *next; int ret; @@ -302,13 +302,11 @@ static int mlx5dr_matcher_disconnect(struct mlx5dr_matcher *matcher) } prev_ft = tbl->ft; - prev_matcher = LIST_FIRST(&tbl->head); LIST_FOREACH(tmp_matcher, &tbl->head, next) { if (tmp_matcher == matcher) break; prev_ft = tmp_matcher->end_ft; - prev_matcher = tmp_matcher; } next = matcher->next.le_next; @@ -322,21 +320,21 @@ static int mlx5dr_matcher_disconnect(struct mlx5dr_matcher *matcher) next->match_ste.rtc_0, next->match_ste.rtc_1); if (ret) { - DR_LOG(ERR, "Failed to disconnect matcher"); - goto matcher_reconnect; + DR_LOG(ERR, "Fatal: failed to disconnect matcher"); + return ret; } } else { ret = mlx5dr_table_connect_to_miss_table(tbl, tbl->default_miss.miss_tbl, true); if (ret) { - DR_LOG(ERR, "Failed to disconnect last matcher"); - goto matcher_reconnect; + DR_LOG(ERR, "Fatal: failed to disconnect last matcher"); + return ret; } } ret = mlx5dr_matcher_shared_update_local_ft(tbl); if (ret) { - DR_LOG(ERR, "Failed to update local_ft in shared table"); - goto matcher_reconnect; + DR_LOG(ERR, "Fatal: failed to update local_ft in shared table"); + return ret; } /* Removing first matcher, update connected miss tables if exists */ @@ -344,25 +342,20 @@ static int mlx5dr_matcher_disconnect(struct mlx5dr_matcher *matcher) ret = mlx5dr_table_update_connected_miss_tables(tbl); if (ret) { DR_LOG(ERR, "Fatal error, failed to update connected miss table"); - goto matcher_reconnect; + return ret; } } ret = mlx5dr_table_ft_set_default_next_ft(tbl, prev_ft); if (ret) { DR_LOG(ERR, "Fatal error, failed to restore matcher ft default miss"); - goto matcher_reconnect; + return ret; } + /* Failure to restore/modify FW results in a critical, unrecoverable error. + * Error handling is not applicable in this fatal scenario. + */ return 0; - -matcher_reconnect: - if (LIST_EMPTY(&tbl->head) || prev_matcher == matcher) - LIST_INSERT_HEAD(&matcher->tbl->head, matcher, next); - else - LIST_INSERT_AFTER(prev_matcher, matcher, next); - - return ret; } static bool mlx5dr_matcher_supp_fw_wqe(struct mlx5dr_matcher *matcher) -- 2.45.2