Author: hselasky
Date: Wed May  8 10:23:33 2019
New Revision: 347246
URL: https://svnweb.freebsd.org/changeset/base/347246

Log:
  Add support for Dynamic Interrupt Moderation, DIM, in mlx5en(4).
  
  Add support for DIM based on Linux,
  with some minor adaptions specific to FreeBSD.
  
  Linux commit
  f97c3dc3c0e8d23a5c4357d182afeef4c67f5c33
  
  MFC after:    3 days
  Sponsored by: Mellanox Technologies

Added:
  head/sys/compat/linuxkpi/common/include/linux/net_dim.h   (contents, props 
changed)
  head/sys/dev/mlx5/mlx5_en/mlx5_en_dim.c   (contents, props changed)
Modified:
  head/sys/conf/files
  head/sys/dev/mlx5/mlx5_en/en.h
  head/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c
  head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
  head/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
  head/sys/modules/mlx5en/Makefile

Added: head/sys/compat/linuxkpi/common/include/linux/net_dim.h
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/compat/linuxkpi/common/include/linux/net_dim.h     Wed May  8 
10:23:33 2019        (r347246)
@@ -0,0 +1,410 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
+ *
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018, Broadcom Limited. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+
+/* This file implements Dynamic Interrupt Moderation, DIM */
+
+#ifndef NET_DIM_H
+#define        NET_DIM_H
+
+#include <asm/types.h>
+
+#include <linux/workqueue.h>
+#include <linux/ktime.h>
+
+struct net_dim_cq_moder {
+       u16     usec;
+       u16     pkts;
+       u8      cq_period_mode;
+};
+
+struct net_dim_sample {
+       ktime_t time;
+       u32     pkt_ctr;
+       u32     byte_ctr;
+       u16     event_ctr;
+};
+
+struct net_dim_stats {
+       int     ppms;                   /* packets per msec */
+       int     bpms;                   /* bytes per msec */
+       int     epms;                   /* events per msec */
+};
+
+struct net_dim {                       /* Adaptive Moderation */
+       u8      state;
+       struct net_dim_stats prev_stats;
+       struct net_dim_sample start_sample;
+       struct work_struct work;
+       u16     event_ctr;
+       u8      profile_ix;
+       u8      mode;
+       u8      tune_state;
+       u8      steps_right;
+       u8      steps_left;
+       u8      tired;
+};
+
+enum {
+       NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0,
+       NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1,
+       NET_DIM_CQ_PERIOD_NUM_MODES = 0x2,
+       NET_DIM_CQ_PERIOD_MODE_DISABLED = 0xFF,
+};
+
+/* Adaptive moderation logic */
+enum {
+       NET_DIM_START_MEASURE,
+       NET_DIM_MEASURE_IN_PROGRESS,
+       NET_DIM_APPLY_NEW_PROFILE,
+};
+
+enum {
+       NET_DIM_PARKING_ON_TOP,
+       NET_DIM_PARKING_TIRED,
+       NET_DIM_GOING_RIGHT,
+       NET_DIM_GOING_LEFT,
+};
+
+enum {
+       NET_DIM_STATS_WORSE,
+       NET_DIM_STATS_SAME,
+       NET_DIM_STATS_BETTER,
+};
+
+enum {
+       NET_DIM_STEPPED,
+       NET_DIM_TOO_TIRED,
+       NET_DIM_ON_EDGE,
+};
+
+#define        NET_DIM_PARAMS_NUM_PROFILES 5
+/* Adaptive moderation profiles */
+#define        NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256
+#define        NET_DIM_DEF_PROFILE_CQE 1
+#define        NET_DIM_DEF_PROFILE_EQE 1
+
+/* All profiles sizes must be NET_PARAMS_DIM_NUM_PROFILES */
+#define        NET_DIM_EQE_PROFILES { \
+       {1,   NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+       {8,   NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+       {64,  NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+       {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+       {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+}
+
+#define        NET_DIM_CQE_PROFILES { \
+       {2,  256},             \
+       {8,  128},             \
+       {16, 64},              \
+       {32, 64},              \
+       {64, 64}               \
+}
+
+static const struct net_dim_cq_moder
+       
net_dim_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
+       NET_DIM_EQE_PROFILES,
+       NET_DIM_CQE_PROFILES,
+};
+
+static inline struct net_dim_cq_moder
+net_dim_get_profile(u8 cq_period_mode,
+    int ix)
+{
+       struct net_dim_cq_moder cq_moder;
+
+       cq_moder = net_dim_profile[cq_period_mode][ix];
+       cq_moder.cq_period_mode = cq_period_mode;
+       return cq_moder;
+}
+
+static inline struct net_dim_cq_moder
+net_dim_get_def_profile(u8 rx_cq_period_mode)
+{
+       int default_profile_ix;
+
+       if (rx_cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE)
+               default_profile_ix = NET_DIM_DEF_PROFILE_CQE;
+       else    /* NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE */
+               default_profile_ix = NET_DIM_DEF_PROFILE_EQE;
+
+       return net_dim_get_profile(rx_cq_period_mode, default_profile_ix);
+}
+
+static inline bool
+net_dim_on_top(struct net_dim *dim)
+{
+       switch (dim->tune_state) {
+       case NET_DIM_PARKING_ON_TOP:
+       case NET_DIM_PARKING_TIRED:
+               return true;
+       case NET_DIM_GOING_RIGHT:
+               return (dim->steps_left > 1) && (dim->steps_right == 1);
+       default:        /* NET_DIM_GOING_LEFT */
+               return (dim->steps_right > 1) && (dim->steps_left == 1);
+       }
+}
+
+static inline void
+net_dim_turn(struct net_dim *dim)
+{
+       switch (dim->tune_state) {
+       case NET_DIM_PARKING_ON_TOP:
+       case NET_DIM_PARKING_TIRED:
+               break;
+       case NET_DIM_GOING_RIGHT:
+               dim->tune_state = NET_DIM_GOING_LEFT;
+               dim->steps_left = 0;
+               break;
+       case NET_DIM_GOING_LEFT:
+               dim->tune_state = NET_DIM_GOING_RIGHT;
+               dim->steps_right = 0;
+               break;
+       }
+}
+
+static inline int
+net_dim_step(struct net_dim *dim)
+{
+       if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2))
+               return NET_DIM_TOO_TIRED;
+
+       switch (dim->tune_state) {
+       case NET_DIM_PARKING_ON_TOP:
+       case NET_DIM_PARKING_TIRED:
+               break;
+       case NET_DIM_GOING_RIGHT:
+               if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1))
+                       return NET_DIM_ON_EDGE;
+               dim->profile_ix++;
+               dim->steps_right++;
+               break;
+       case NET_DIM_GOING_LEFT:
+               if (dim->profile_ix == 0)
+                       return NET_DIM_ON_EDGE;
+               dim->profile_ix--;
+               dim->steps_left++;
+               break;
+       }
+
+       dim->tired++;
+       return NET_DIM_STEPPED;
+}
+
+static inline void
+net_dim_park_on_top(struct net_dim *dim)
+{
+       dim->steps_right = 0;
+       dim->steps_left = 0;
+       dim->tired = 0;
+       dim->tune_state = NET_DIM_PARKING_ON_TOP;
+}
+
+static inline void
+net_dim_park_tired(struct net_dim *dim)
+{
+       dim->steps_right = 0;
+       dim->steps_left = 0;
+       dim->tune_state = NET_DIM_PARKING_TIRED;
+}
+
+static inline void
+net_dim_exit_parking(struct net_dim *dim)
+{
+       dim->tune_state = dim->profile_ix ? NET_DIM_GOING_LEFT :
+       NET_DIM_GOING_RIGHT;
+       net_dim_step(dim);
+}
+
+#define        IS_SIGNIFICANT_DIFF(val, ref) \
+       (((100UL * abs((val) - (ref))) / (ref)) > 10)   /* more than 10%
+                                                        * difference */
+
+static inline int
+net_dim_stats_compare(struct net_dim_stats *curr,
+    struct net_dim_stats *prev)
+{
+       if (!prev->bpms)
+               return curr->bpms ? NET_DIM_STATS_BETTER :
+               NET_DIM_STATS_SAME;
+
+       if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms))
+               return (curr->bpms > prev->bpms) ? NET_DIM_STATS_BETTER :
+                   NET_DIM_STATS_WORSE;
+
+       if (!prev->ppms)
+               return curr->ppms ? NET_DIM_STATS_BETTER :
+                   NET_DIM_STATS_SAME;
+
+       if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms))
+               return (curr->ppms > prev->ppms) ? NET_DIM_STATS_BETTER :
+                   NET_DIM_STATS_WORSE;
+
+       if (!prev->epms)
+               return NET_DIM_STATS_SAME;
+
+       if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms))
+               return (curr->epms < prev->epms) ? NET_DIM_STATS_BETTER :
+                   NET_DIM_STATS_WORSE;
+
+       return NET_DIM_STATS_SAME;
+}
+
+static inline bool
+net_dim_decision(struct net_dim_stats *curr_stats,
+    struct net_dim *dim)
+{
+       int prev_state = dim->tune_state;
+       int prev_ix = dim->profile_ix;
+       int stats_res;
+       int step_res;
+
+       switch (dim->tune_state) {
+       case NET_DIM_PARKING_ON_TOP:
+               stats_res = net_dim_stats_compare(curr_stats, &dim->prev_stats);
+               if (stats_res != NET_DIM_STATS_SAME)
+                       net_dim_exit_parking(dim);
+               break;
+
+       case NET_DIM_PARKING_TIRED:
+               dim->tired--;
+               if (!dim->tired)
+                       net_dim_exit_parking(dim);
+               break;
+
+       case NET_DIM_GOING_RIGHT:
+       case NET_DIM_GOING_LEFT:
+               stats_res = net_dim_stats_compare(curr_stats, &dim->prev_stats);
+               if (stats_res != NET_DIM_STATS_BETTER)
+                       net_dim_turn(dim);
+
+               if (net_dim_on_top(dim)) {
+                       net_dim_park_on_top(dim);
+                       break;
+               }
+               step_res = net_dim_step(dim);
+               switch (step_res) {
+               case NET_DIM_ON_EDGE:
+                       net_dim_park_on_top(dim);
+                       break;
+               case NET_DIM_TOO_TIRED:
+                       net_dim_park_tired(dim);
+                       break;
+               }
+
+               break;
+       }
+
+       if ((prev_state != NET_DIM_PARKING_ON_TOP) ||
+           (dim->tune_state != NET_DIM_PARKING_ON_TOP))
+               dim->prev_stats = *curr_stats;
+
+       return dim->profile_ix != prev_ix;
+}
+
+static inline void
+net_dim_sample(u16 event_ctr,
+    u64 packets,
+    u64 bytes,
+    struct net_dim_sample *s)
+{
+       s->time = ktime_get();
+       s->pkt_ctr = packets;
+       s->byte_ctr = bytes;
+       s->event_ctr = event_ctr;
+}
+
+#define        NET_DIM_NEVENTS 64
+#define        BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) 
& (BIT_ULL(bits) - 1))
+
+static inline void
+net_dim_calc_stats(struct net_dim_sample *start,
+    struct net_dim_sample *end,
+    struct net_dim_stats *curr_stats)
+{
+       /* u32 holds up to 71 minutes, should be enough */
+       u32 delta_us = ktime_us_delta(end->time, start->time);
+       u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr);
+       u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr,
+           start->byte_ctr);
+
+       if (!delta_us)
+               return;
+
+       curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us);
+       curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us);
+       curr_stats->epms = DIV_ROUND_UP(NET_DIM_NEVENTS * USEC_PER_MSEC,
+           delta_us);
+}
+
+static inline void
+net_dim(struct net_dim *dim,
+    u64 packets, u64 bytes)
+{
+       struct net_dim_stats curr_stats;
+       struct net_dim_sample end_sample;
+       u16 nevents;
+
+       dim->event_ctr++;
+
+       switch (dim->state) {
+       case NET_DIM_MEASURE_IN_PROGRESS:
+               nevents = BIT_GAP(BITS_PER_TYPE(u16),
+                   dim->event_ctr,
+                   dim->start_sample.event_ctr);
+               if (nevents < NET_DIM_NEVENTS)
+                       break;
+               net_dim_sample(dim->event_ctr, packets, bytes, &end_sample);
+               net_dim_calc_stats(&dim->start_sample, &end_sample,
+                   &curr_stats);
+               if (net_dim_decision(&curr_stats, dim)) {
+                       dim->state = NET_DIM_APPLY_NEW_PROFILE;
+                       schedule_work(&dim->work);
+                       break;
+               }
+               /* FALLTHROUGH */
+       case NET_DIM_START_MEASURE:
+               net_dim_sample(dim->event_ctr, packets, bytes, 
&dim->start_sample);
+               dim->state = NET_DIM_MEASURE_IN_PROGRESS;
+               break;
+       case NET_DIM_APPLY_NEW_PROFILE:
+               break;
+       default:
+               break;
+       }
+}
+
+#endif                                 /* NET_DIM_H */

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files Wed May  8 09:03:43 2019        (r347245)
+++ head/sys/conf/files Wed May  8 10:23:33 2019        (r347246)
@@ -4764,6 +4764,8 @@ dev/mlx5/mlx5_core/mlx5_wq.c                      
optional mlx5 pci       \
 dev/mlx5/mlx5_lib/mlx5_gid.c                   optional mlx5 pci       \
        compile-with "${OFED_C}"
 
+dev/mlx5/mlx5_en/mlx5_en_dim.c                 optional mlx5en pci inet inet6  
\
+       compile-with "${OFED_C}"
 dev/mlx5/mlx5_en/mlx5_en_ethtool.c             optional mlx5en pci inet inet6  
\
        compile-with "${OFED_C}"
 dev/mlx5/mlx5_en/mlx5_en_main.c                        optional mlx5en pci 
inet inet6  \

Modified: head/sys/dev/mlx5/mlx5_en/en.h
==============================================================================
--- head/sys/dev/mlx5/mlx5_en/en.h      Wed May  8 09:03:43 2019        
(r347245)
+++ head/sys/dev/mlx5/mlx5_en/en.h      Wed May  8 10:23:33 2019        
(r347246)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -38,6 +38,8 @@
 #include <linux/delay.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
+#include <linux/ktime.h>
+#include <linux/net_dim.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
@@ -95,6 +97,8 @@
 #define        MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ \
     MIN(65535, MLX5E_MAX_RX_SEGS * MLX5E_MAX_RX_BYTES)
 #endif
+#define        MLX5E_DIM_DEFAULT_PROFILE 3
+#define        MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO    16
 #define        MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC      0x10
 #define        MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE     0x3
 #define        MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS      0x20
@@ -472,7 +476,7 @@ struct mlx5e_params {
   m(+1, u64 coalesce_pkts_max, "coalesce_pkts_max", "Maximum packets to join") 
\
   m(+1, u64 rx_coalesce_usecs, "rx_coalesce_usecs", "Limit in usec for joining 
rx packets") \
   m(+1, u64 rx_coalesce_pkts, "rx_coalesce_pkts", "Maximum number of rx 
packets to join") \
-  m(+1, u64 rx_coalesce_mode, "rx_coalesce_mode", "0: EQE mode 1: CQE mode") \
+  m(+1, u64 rx_coalesce_mode, "rx_coalesce_mode", "0: EQE fixed mode 1: CQE 
fixed mode 2: EQE auto mode 3: CQE auto mode") \
   m(+1, u64 tx_coalesce_usecs, "tx_coalesce_usecs", "Limit in usec for joining 
tx packets") \
   m(+1, u64 tx_coalesce_pkts, "tx_coalesce_pkts", "Maximum number of tx 
packets to join") \
   m(+1, u64 tx_coalesce_mode, "tx_coalesce_mode", "0: EQE mode 1: CQE mode") \
@@ -562,6 +566,9 @@ struct mlx5e_rq {
        volatile int enabled;
        int     ix;
 
+       /* Dynamic Interrupt Moderation */
+       struct net_dim dim;
+
        /* control */
        struct mlx5_wq_ctrl wq_ctrl;
        u32     rqn;
@@ -881,6 +888,9 @@ void        mlx5e_cq_error_event(struct mlx5_core_cq *mcq, 
in
 void   mlx5e_rx_cq_comp(struct mlx5_core_cq *);
 void   mlx5e_tx_cq_comp(struct mlx5_core_cq *);
 struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq);
+
+void   mlx5e_dim_work(struct work_struct *);
+void   mlx5e_dim_build_cq_param(struct mlx5e_priv *, struct mlx5e_cq_param *);
 
 int    mlx5e_open_flow_table(struct mlx5e_priv *priv);
 void   mlx5e_close_flow_table(struct mlx5e_priv *priv);

Added: head/sys/dev/mlx5/mlx5_en/mlx5_en_dim.c
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/dev/mlx5/mlx5_en/mlx5_en_dim.c     Wed May  8 10:23:33 2019        
(r347246)
@@ -0,0 +1,92 @@
+/*-
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "en.h"
+
+void
+mlx5e_dim_build_cq_param(struct mlx5e_priv *priv,
+    struct mlx5e_cq_param *param)
+{
+       struct net_dim_cq_moder prof;
+       void *cqc = param->cqc;
+
+       if (priv->params.rx_cq_moderation_mode < 2)
+               return;
+
+       switch (MLX5_GET(cqc, cqc, cq_period_mode)) {
+       case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
+               prof = net_dim_profile[NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE]
+                   [NET_DIM_DEF_PROFILE_CQE];
+               MLX5_SET(cqc, cqc, cq_period, prof.usec);
+               MLX5_SET(cqc, cqc, cq_max_count, prof.pkts);
+               break;
+
+       case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
+               prof = net_dim_profile[NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE]
+                   [NET_DIM_DEF_PROFILE_EQE];
+               MLX5_SET(cqc, cqc, cq_period, prof.usec);
+               MLX5_SET(cqc, cqc, cq_max_count, prof.pkts);
+               break;
+       default:
+               break;
+       }
+}
+
+void
+mlx5e_dim_work(struct work_struct *work)
+{
+       struct net_dim *dim = container_of(work, struct net_dim, work);
+       struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim);
+       struct mlx5e_channel *c = container_of(rq, struct mlx5e_channel, rq);
+       struct net_dim_cq_moder cur_profile;
+       u8 profile_ix;
+       u8 mode;
+
+       /* copy current auto moderation settings and set new state */
+       mtx_lock(&rq->mtx);
+       profile_ix = dim->profile_ix;
+       mode = dim->mode;
+       dim->state = NET_DIM_START_MEASURE;
+       mtx_unlock(&rq->mtx);
+
+       /* check for invalid mode */
+       if (mode == 255)
+               return;
+
+       /* get current profile */
+       cur_profile = net_dim_profile[mode][profile_ix];
+
+       /* apply LRO restrictions */
+       if (c->priv->params.hw_lro_en &&
+           cur_profile.pkts > MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO) {
+               cur_profile.pkts = MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO;
+       }
+
+       /* modify CQ */
+       mlx5_core_modify_cq_moderation(c->priv->mdev, &rq->cq.mcq,
+           cur_profile.usec, cur_profile.pkts);
+}

Modified: head/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c
==============================================================================
--- head/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c Wed May  8 09:03:43 2019        
(r347245)
+++ head/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c Wed May  8 10:23:33 2019        
(r347246)
@@ -629,8 +629,8 @@ mlx5e_ethtool_handler(SYSCTL_HANDLER_ARGS)
                        mlx5e_close_locked(priv->ifp);
 
                /* import RX coalesce mode */
-               if (priv->params_ethtool.rx_coalesce_mode != 0)
-                       priv->params_ethtool.rx_coalesce_mode = 1;
+               if (priv->params_ethtool.rx_coalesce_mode > 3)
+                       priv->params_ethtool.rx_coalesce_mode = 3;
                priv->params.rx_cq_moderation_mode =
                    priv->params_ethtool.rx_coalesce_mode;
 

Modified: head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
==============================================================================
--- head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c    Wed May  8 09:03:43 2019        
(r347245)
+++ head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c    Wed May  8 10:23:33 2019        
(r347246)
@@ -915,6 +915,26 @@ mlx5e_create_rq(struct mlx5e_channel *c,
 #endif
        }
 
+       INIT_WORK(&rq->dim.work, mlx5e_dim_work);
+       if (priv->params.rx_cq_moderation_mode < 2) {
+               rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
+       } else {
+               void *cqc = container_of(param,
+                   struct mlx5e_channel_param, rq)->rx_cq.cqc;
+
+               switch (MLX5_GET(cqc, cqc, cq_period_mode)) {
+               case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
+                       rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+                       break;
+               case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
+                       rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
+                       break;
+               default:
+                       rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
+                       break;
+               }
+       }
+
        rq->ifp = c->tag.m_snd_tag.ifp;
        rq->channel = c;
        rq->ix = c->ix;
@@ -1116,6 +1136,7 @@ mlx5e_close_rq_wait(struct mlx5e_rq *rq)
                rq->cq.mcq.comp(&rq->cq.mcq);
        }
 
+       cancel_work_sync(&rq->dim.work);
        mlx5e_disable_rq(rq);
        mlx5e_destroy_rq(rq);
 }
@@ -1916,9 +1937,23 @@ mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
 }
 
 static void
+mlx5e_get_default_profile(struct mlx5e_priv *priv, int mode, struct 
net_dim_cq_moder *ptr)
+{
+
+       *ptr = net_dim_get_profile(mode, MLX5E_DIM_DEFAULT_PROFILE);
+
+       /* apply LRO restrictions */
+       if (priv->params.hw_lro_en &&
+           ptr->pkts > MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO) {
+               ptr->pkts = MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO;
+       }
+}
+
+static void
 mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
     struct mlx5e_cq_param *param)
 {
+       struct net_dim_cq_moder curr;
        void *cqc = param->cqc;
 
 
@@ -1932,21 +1967,42 @@ mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
        }
 
        MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
-       MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
-       MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
 
        switch (priv->params.rx_cq_moderation_mode) {
        case 0:
+               MLX5_SET(cqc, cqc, cq_period, 
priv->params.rx_cq_moderation_usec);
+               MLX5_SET(cqc, cqc, cq_max_count, 
priv->params.rx_cq_moderation_pkts);
                MLX5_SET(cqc, cqc, cq_period_mode, 
MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
                break;
-       default:
+       case 1:
+               MLX5_SET(cqc, cqc, cq_period, 
priv->params.rx_cq_moderation_usec);
+               MLX5_SET(cqc, cqc, cq_max_count, 
priv->params.rx_cq_moderation_pkts);
                if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
                        MLX5_SET(cqc, cqc, cq_period_mode, 
MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
                else
                        MLX5_SET(cqc, cqc, cq_period_mode, 
MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
                break;
+       case 2:
+               mlx5e_get_default_profile(priv, 
NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE, &curr);
+               MLX5_SET(cqc, cqc, cq_period, curr.usec);
+               MLX5_SET(cqc, cqc, cq_max_count, curr.pkts);
+               MLX5_SET(cqc, cqc, cq_period_mode, 
MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
+               break;
+       case 3:
+               mlx5e_get_default_profile(priv, 
NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE, &curr);
+               MLX5_SET(cqc, cqc, cq_period, curr.usec);
+               MLX5_SET(cqc, cqc, cq_max_count, curr.pkts);
+               if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
+                       MLX5_SET(cqc, cqc, cq_period_mode, 
MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+               else
+                       MLX5_SET(cqc, cqc, cq_period_mode, 
MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
+               break;
+       default:
+               break;
        }
 
+       mlx5e_dim_build_cq_param(priv, param);
+
        mlx5e_build_common_cq_param(priv, param);
 }
 
@@ -2037,6 +2093,7 @@ mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struc
 
                switch (priv->params.tx_cq_moderation_mode) {
                case 0:
+               case 2:
                        cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
                        break;
                default:
@@ -2061,22 +2118,49 @@ mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struc
 
        if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
                uint8_t cq_mode;
+               uint8_t dim_mode;
                int retval;
 
                switch (priv->params.rx_cq_moderation_mode) {
                case 0:
+               case 2:
                        cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+                       dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
                        break;
                default:
                        cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
+                       dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
                        break;
                }
 
-               retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, 
&rq->cq.mcq,
-                   priv->params.rx_cq_moderation_usec,
-                   priv->params.rx_cq_moderation_pkts,
-                   cq_mode);
+               /* tear down dynamic interrupt moderation */
+               mtx_lock(&rq->mtx);
+               rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
+               mtx_unlock(&rq->mtx);
 
+               /* wait for dynamic interrupt moderation work task, if any */
+               cancel_work_sync(&rq->dim.work);
+
+               if (priv->params.rx_cq_moderation_mode >= 2) {
+                       struct net_dim_cq_moder curr;
+
+                       mlx5e_get_default_profile(priv, dim_mode, &curr);
+
+                       retval = 
mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
+                           curr.usec, curr.pkts, cq_mode);
+
+                       /* set dynamic interrupt moderation mode and zero 
defaults */
+                       mtx_lock(&rq->mtx);
+                       rq->dim.mode = dim_mode;
+                       rq->dim.state = 0;
+                       rq->dim.profile_ix = MLX5E_DIM_DEFAULT_PROFILE;
+                       mtx_unlock(&rq->mtx);
+               } else {
+                       retval = 
mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
+                           priv->params.rx_cq_moderation_usec,
+                           priv->params.rx_cq_moderation_pkts,
+                           cq_mode);
+               }
                return (retval);
        }
 

Modified: head/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
==============================================================================
--- head/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c      Wed May  8 09:03:43 2019        
(r347245)
+++ head/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c      Wed May  8 10:23:33 2019        
(r347246)
@@ -585,6 +585,9 @@ mlx5e_rx_cq_comp(struct mlx5_core_cq *mcq)
                mlx5e_post_rx_wqes(rq);
        }
        mlx5e_post_rx_wqes(rq);
+       /* check for dynamic interrupt moderation callback */
+       if (rq->dim.mode != NET_DIM_CQ_PERIOD_MODE_DISABLED)
+               net_dim(&rq->dim, rq->stats.packets, rq->stats.bytes);
        mlx5e_cq_arm(&rq->cq, 
MLX5_GET_DOORBELL_LOCK(&rq->channel->priv->doorbell_lock));
        tcp_lro_flush_all(&rq->lro);
        mtx_unlock(&rq->mtx);

Modified: head/sys/modules/mlx5en/Makefile
==============================================================================
--- head/sys/modules/mlx5en/Makefile    Wed May  8 09:03:43 2019        
(r347245)
+++ head/sys/modules/mlx5en/Makefile    Wed May  8 10:23:33 2019        
(r347246)
@@ -3,6 +3,7 @@
 
 KMOD=mlx5en
 SRCS= \
+mlx5_en_dim.c \
 mlx5_en_ethtool.c \
 mlx5_en_main.c \
 mlx5_en_tx.c \
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to