From: Cosmin Ratiu <[email protected]>

Up to now, rate groups could only contain vports from the same E-Switch.
This patch relaxes that restriction if the device supports it
(HCA_CAP.esw_cross_esw_sched == true) and the right conditions are met:
- Link Aggregation (LAG) is enabled.
- The E-Switches are from the same shared devlink device.

Signed-off-by: Cosmin Ratiu <[email protected]>
Reviewed-by: Carolina Jubran <[email protected]>
Signed-off-by: Tariq Toukan <[email protected]>
---
 .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 117 +++++++++++++-----
 1 file changed, 83 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c 
b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index f67f99428959..a3d511367297 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -50,7 +50,9 @@ struct mlx5_esw_sched_node {
        enum sched_node_type type;
        /* The eswitch this node belongs to. */
        struct mlx5_eswitch *esw;
-       /* The children nodes of this node, empty list for leaf nodes. */
+       /* The children nodes of this node, empty list for leaf nodes.
+        * Can be from multiple E-Switches.
+        */
        struct list_head children;
        /* Valid only if this node is associated with a vport. */
        struct mlx5_vport *vport;
@@ -393,6 +395,7 @@ esw_qos_vport_create_sched_element(struct 
mlx5_esw_sched_node *vport_node,
        struct mlx5_esw_sched_node *parent = vport_node->parent;
        u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
        struct mlx5_core_dev *dev = vport_node->esw->dev;
+       struct mlx5_vport *vport = vport_node->vport;
        void *attr;
 
        if (!mlx5_qos_element_type_supported(
@@ -404,10 +407,17 @@ esw_qos_vport_create_sched_element(struct 
mlx5_esw_sched_node *vport_node,
        MLX5_SET(scheduling_context, sched_ctx, element_type,
                 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
        attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
-       MLX5_SET(vport_element, attr, vport_number, vport_node->vport->vport);
+       MLX5_SET(vport_element, attr, vport_number, vport->vport);
        MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent->ix);
        MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
                 vport_node->max_rate);
+       if (vport->dev != dev) {
+               /* The port is assigned to a node on another eswitch. */
+               MLX5_SET(vport_element, attr, eswitch_owner_vhca_id_valid,
+                        true);
+               MLX5_SET(vport_element, attr, eswitch_owner_vhca_id,
+                        MLX5_CAP_GEN(vport->dev, vhca_id));
+       }
 
        return esw_qos_node_create_sched_element(vport_node, sched_ctx, extack);
 }
@@ -419,6 +429,7 @@ esw_qos_vport_tc_create_sched_element(struct 
mlx5_esw_sched_node *vport_tc_node,
 {
        u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
        struct mlx5_core_dev *dev = vport_tc_node->esw->dev;
+       struct mlx5_vport *vport = vport_tc_node->vport;
        void *attr;
 
        if (!mlx5_qos_element_type_supported(
@@ -430,8 +441,7 @@ esw_qos_vport_tc_create_sched_element(struct 
mlx5_esw_sched_node *vport_tc_node,
        MLX5_SET(scheduling_context, sched_ctx, element_type,
                 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC);
        attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
-       MLX5_SET(vport_tc_element, attr, vport_number,
-                vport_tc_node->vport->vport);
+       MLX5_SET(vport_tc_element, attr, vport_number, vport->vport);
        MLX5_SET(vport_tc_element, attr, traffic_class, vport_tc_node->tc);
        MLX5_SET(scheduling_context, sched_ctx, max_bw_obj_id,
                 rate_limit_elem_ix);
@@ -439,6 +449,13 @@ esw_qos_vport_tc_create_sched_element(struct 
mlx5_esw_sched_node *vport_tc_node,
                 vport_tc_node->parent->ix);
        MLX5_SET(scheduling_context, sched_ctx, bw_share,
                 vport_tc_node->bw_share);
+       if (vport->dev != dev) {
+               /* The port is assigned to a node on another eswitch. */
+               MLX5_SET(vport_tc_element, attr, eswitch_owner_vhca_id_valid,
+                        true);
+               MLX5_SET(vport_tc_element, attr, eswitch_owner_vhca_id,
+                        MLX5_CAP_GEN(vport->dev, vhca_id));
+       }
 
        return esw_qos_node_create_sched_element(vport_tc_node, sched_ctx,
                                                 extack);
@@ -1160,6 +1177,29 @@ static int esw_qos_vport_tc_check_type(enum 
sched_node_type curr_type,
        return 0;
 }
 
+static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw,
+                                              u32 *tc_bw)
+{
+       int i, num_tcs = esw_qos_num_tcs(esw->dev);
+
+       for (i = num_tcs; i < DEVLINK_RATE_TCS_MAX; i++)
+               if (tc_bw[i])
+                       return false;
+
+       return true;
+}
+
+static bool esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport *vport,
+                                                    u32 *tc_bw)
+{
+       struct mlx5_esw_sched_node *node = vport->qos.sched_node;
+       struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
+
+       esw = (node && node->parent) ? node->parent->esw : esw;
+
+       return esw_qos_validate_unsupported_tc_bw(esw, tc_bw);
+}
+
 static int esw_qos_vport_update(struct mlx5_vport *vport,
                                enum sched_node_type type,
                                struct mlx5_esw_sched_node *parent,
@@ -1179,8 +1219,15 @@ static int esw_qos_vport_update(struct mlx5_vport *vport,
        if (err)
                return err;
 
-       if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type)
+       if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) {
                esw_qos_tc_arbiter_get_bw_shares(vport_node, curr_tc_bw);
+               if (!esw_qos_validate_unsupported_tc_bw(parent->esw,
+                                                       curr_tc_bw)) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "Unsupported traffic classes on the 
new device");
+                       return -EOPNOTSUPP;
+               }
+       }
 
        esw_qos_vport_disable(vport, extack);
 
@@ -1510,30 +1557,6 @@ static int esw_qos_devlink_rate_to_mbps(struct 
mlx5_core_dev *mdev, const char *
        return 0;
 }
 
-static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw,
-                                              u32 *tc_bw)
-{
-       int i, num_tcs = esw_qos_num_tcs(esw->dev);
-
-       for (i = num_tcs; i < DEVLINK_RATE_TCS_MAX; i++) {
-               if (tc_bw[i])
-                       return false;
-       }
-
-       return true;
-}
-
-static bool esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport *vport,
-                                                    u32 *tc_bw)
-{
-       struct mlx5_esw_sched_node *node = vport->qos.sched_node;
-       struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
-
-       esw = (node && node->parent) ? node->parent->esw : esw;
-
-       return esw_qos_validate_unsupported_tc_bw(esw, tc_bw);
-}
-
 static bool esw_qos_tc_bw_disabled(u32 *tc_bw)
 {
        int i;
@@ -1738,18 +1761,44 @@ int mlx5_esw_devlink_rate_node_del(struct devlink_rate 
*rate_node, void *priv,
        return 0;
 }
 
+static int
+mlx5_esw_validate_cross_esw_scheduling(struct mlx5_eswitch *esw,
+                                      struct mlx5_esw_sched_node *parent,
+                                      struct netlink_ext_ack *extack)
+{
+       if (!parent || esw == parent->esw)
+               return 0;
+
+       if (!MLX5_CAP_QOS(esw->dev, esw_cross_esw_sched)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Cross E-Switch scheduling is not 
supported");
+               return -EOPNOTSUPP;
+       }
+       if (esw->dev->shd != parent->esw->dev->shd) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Cannot add vport to a parent belonging to a 
different device");
+               return -EOPNOTSUPP;
+       }
+       if (!mlx5_lag_is_active(esw->dev)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Cross E-Switch scheduling requires LAG to 
be activated");
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
 static int
 mlx5_esw_qos_vport_update_parent(struct mlx5_vport *vport,
                                 struct mlx5_esw_sched_node *parent,
                                 struct netlink_ext_ack *extack)
 {
        struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
-       int err = 0;
+       int err;
 
-       if (parent && parent->esw != esw) {
-               NL_SET_ERR_MSG_MOD(extack, "Cross E-Switch scheduling is not 
supported");
-               return -EOPNOTSUPP;
-       }
+       err = mlx5_esw_validate_cross_esw_scheduling(esw, parent, extack);
+       if (err)
+               return err;
 
        if (!vport->qos.sched_node && parent) {
                enum sched_node_type type;
-- 
2.44.0


Reply via email to