The branch main has been updated by hselasky:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=a8e715d21b963251e449187c98292fff77dc7576

commit a8e715d21b963251e449187c98292fff77dc7576
Author:     Hans Petter Selasky <hsela...@freebsd.org>
AuthorDate: 2022-02-01 15:20:12 +0000
Commit:     Hans Petter Selasky <hsela...@freebsd.org>
CommitDate: 2022-02-01 15:21:16 +0000

    mlx5en: Add race protection for SQ remap
    
    Add a refcount for posted WQEs to avoid a race between
    post WQE and FW command flows.
    
    MFC after:      1 week
    Sponsored by:   NVIDIA Networking
---
 sys/dev/mlx5/mlx5_en/en_rl.h      |  1 +
 sys/dev/mlx5/mlx5_en/mlx5_en_rl.c | 20 +++++++++++++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/sys/dev/mlx5/mlx5_en/en_rl.h b/sys/dev/mlx5/mlx5_en/en_rl.h
index 1d7f7afc487d..5d4cb7365f2a 100644
--- a/sys/dev/mlx5/mlx5_en/en_rl.h
+++ b/sys/dev/mlx5/mlx5_en/en_rl.h
@@ -136,6 +136,7 @@ struct mlx5e_rl_channel {
        uint64_t new_rate;
        uint64_t init_rate;
        uint64_t last_rate;
+       uint32_t refcount;
        uint16_t last_burst;
        uint16_t state;
 };
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c 
b/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c
index 6fe4bd344710..09ff0143e430 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c
@@ -424,7 +424,8 @@ mlx5e_rl_find_best_rate_locked(struct mlx5e_rl_priv_data 
*rl, uint64_t user_rate
 }
 
 static int
-mlx5e_rl_post_sq_remap_wqe(struct mlx5e_iq *iq, u32 scq_handle, u32 sq_handle)
+mlx5e_rl_post_sq_remap_wqe(struct mlx5e_iq *iq, u32 scq_handle, u32 sq_handle,
+    struct mlx5e_rl_channel *sq_channel)
 {
        const u32 ds_cnt = DIV_ROUND_UP(sizeof(struct mlx5e_tx_qos_remap_wqe),
                    MLX5_SEND_WQE_DS);
@@ -454,6 +455,8 @@ mlx5e_rl_post_sq_remap_wqe(struct mlx5e_iq *iq, u32 
scq_handle, u32 sq_handle)
        memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32));
 
        iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+       iq->data[pi].p_refcount = &sq_channel->refcount;
+       atomic_add_int(iq->data[pi].p_refcount, 1);
        iq->pc += iq->data[pi].num_wqebbs;
 
        mlx5e_iq_notify_hw(iq);
@@ -464,7 +467,8 @@ mlx5e_rl_post_sq_remap_wqe(struct mlx5e_iq *iq, u32 
scq_handle, u32 sq_handle)
 }
 
 static int
-mlx5e_rl_remap_sq(struct mlx5e_sq *sq, uint16_t index)
+mlx5e_rl_remap_sq(struct mlx5e_sq *sq, uint16_t index,
+    struct mlx5e_rl_channel *sq_channel)
 {
        struct mlx5e_channel *iq_channel;
        u32     scq_handle;
@@ -477,10 +481,12 @@ mlx5e_rl_remap_sq(struct mlx5e_sq *sq, uint16_t index)
        sq_handle = sq->queue_handle;
        scq_handle = mlx5_rl_get_scq_handle(sq->priv->mdev, index);
 
-       if (sq_handle == -1U || scq_handle == -1U)
+       if (sq_handle == MLX5_INVALID_QUEUE_HANDLE ||
+           scq_handle == MLX5_INVALID_QUEUE_HANDLE)
                error = -1;
        else
-               error = mlx5e_rl_post_sq_remap_wqe(&iq_channel->iq, scq_handle, 
sq_handle);
+               error = mlx5e_rl_post_sq_remap_wqe(&iq_channel->iq, scq_handle,
+                   sq_handle, sq_channel);
 
        return (error);
 }
@@ -568,7 +574,11 @@ mlx5e_rlw_channel_set_rate_locked(struct mlx5e_rl_worker 
*rlw,
        /* set new rate, if SQ is running */
        sq = channel->sq;
        if (sq != NULL && READ_ONCE(sq->running) != 0) {
-               if (!use_sq_remap || mlx5e_rl_remap_sq(sq, index)) {
+               if (!use_sq_remap || mlx5e_rl_remap_sq(sq, index, channel)) {
+                       while (atomic_load_int(&channel->refcount) != 0 &&
+                           rlw->priv->mdev->state != 
MLX5_DEVICE_STATE_INTERNAL_ERROR &&
+                           pci_channel_offline(rlw->priv->mdev->pdev) == 0)
+                               pause("W", 1);
                        error = mlx5e_rl_modify_sq(sq, index);
                        if (error != 0)
                                
atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL);

Reply via email to