Author: hselasky
Date: Tue Aug 14 11:24:14 2018
New Revision: 337743
URL: https://svnweb.freebsd.org/changeset/base/337743

Log:
  Enter error state when handling bad device in mlx5core and add checks
  for error state to mlx5en(4) to make live migration work.
  
  This is a direct commit.
  
  Sponsored by: Mellanox Technologies

Modified:
  stable/10/sys/dev/mlx5/mlx5_core/mlx5_health.c
  stable/10/sys/dev/mlx5/mlx5_en/mlx5_en_main.c

Modified: stable/10/sys/dev/mlx5/mlx5_core/mlx5_health.c
==============================================================================
--- stable/10/sys/dev/mlx5/mlx5_core/mlx5_health.c      Tue Aug 14 11:19:04 
2018        (r337742)
+++ stable/10/sys/dev/mlx5/mlx5_core/mlx5_health.c      Tue Aug 14 11:24:14 
2018        (r337743)
@@ -56,10 +56,13 @@ static void health_care(struct work_struct *work)
                priv = container_of(health, struct mlx5_priv, health);
                dev = container_of(priv, struct mlx5_core_dev, priv);
                mlx5_core_warn(dev, "handling bad device here\n");
-               /* nothing yet */
+
                spin_lock_irq(&health_lock);
                list_del_init(&health->list);
                spin_unlock_irq(&health_lock);
+
+               /* enter error state */
+               mlx5_enter_error_state(dev);
        }
 }
 

Modified: stable/10/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
==============================================================================
--- stable/10/sys/dev/mlx5/mlx5_en/mlx5_en_main.c       Tue Aug 14 11:19:04 
2018        (r337742)
+++ stable/10/sys/dev/mlx5/mlx5_en/mlx5_en_main.c       Tue Aug 14 11:24:14 
2018        (r337743)
@@ -921,8 +921,11 @@ mlx5e_close_rq(struct mlx5e_rq *rq)
 static void
 mlx5e_close_rq_wait(struct mlx5e_rq *rq)
 {
+       struct mlx5_core_dev *mdev = rq->channel->priv->mdev;
+
        /* wait till RQ is empty */
-       while (!mlx5_wq_ll_is_empty(&rq->wq)) {
+       while (!mlx5_wq_ll_is_empty(&rq->wq) &&
+               (mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
                msleep(4);
                rq->cq.mcq.comp(&rq->cq.mcq);
        }
@@ -1264,6 +1267,7 @@ void
 mlx5e_drain_sq(struct mlx5e_sq *sq)
 {
        int error;
+       struct mlx5_core_dev *mdev = sq->priv->mdev;
 
        /*
         * Check if already stopped.
@@ -1296,7 +1300,8 @@ mlx5e_drain_sq(struct mlx5e_sq *sq)
        /* wait till SQ is empty or link is down */
        mtx_lock(&sq->lock);
        while (sq->cc != sq->pc &&
-           (sq->priv->media_status_last & IFM_ACTIVE) != 0) {
+           (sq->priv->media_status_last & IFM_ACTIVE) != 0 &&
+           mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
                mtx_unlock(&sq->lock);
                msleep(1);
                sq->cq.mcq.comp(&sq->cq.mcq);
@@ -1313,7 +1318,8 @@ mlx5e_drain_sq(struct mlx5e_sq *sq)
 
        /* wait till SQ is empty */
        mtx_lock(&sq->lock);
-       while (sq->cc != sq->pc) {
+       while (sq->cc != sq->pc &&
+           mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
                mtx_unlock(&sq->lock);
                msleep(1);
                sq->cq.mcq.comp(&sq->cq.mcq);
_______________________________________________
svn-src-stable-10@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-stable-10
To unsubscribe, send any mail to "svn-src-stable-10-unsubscr...@freebsd.org"

Reply via email to