ovn-northd processes the list of Port_Bindings and hashes the list of
queues per chassis. When it finds a port with qos_parameters and without
a queue_id, it allocates a free queue for the chassis that this port belongs.
The queue_id information is stored in the options field of Port_binding table.
Adds an action set_queue to the ingress table 0 of the logical flows
which will be translated to openflow set_queue by ovn-controller

ovn-controller opens the netdev corresponding to the tunnel interface's
status:tunnel_egress_iface value and configures a HTB qdisc on it. Then for
each SB port_binding that has queue_id set, it allocates a queue with the
qos_parameters of that port. It also frees up unused queues.

This patch replaces the older approach of policing

Signed-off-by: Babu Shanmugam <bscha...@redhat.com>
---
 ovn/controller/binding.c | 140 +++++++++++++++++++++++++++++++++++++++++------
 ovn/lib/actions.c        |  31 +++++++++++
 ovn/northd/ovn-northd.c  | 134 +++++++++++++++++++++++++++++++++++++++++++--
 ovn/ovn-nb.xml           |   8 +--
 ovn/ovn-sb.xml           |  31 +++++++++--
 5 files changed, 314 insertions(+), 30 deletions(-)

diff --git a/ovn/controller/binding.c b/ovn/controller/binding.c
index a0d8b96..8628e3c 100644
--- a/ovn/controller/binding.c
+++ b/ovn/controller/binding.c
@@ -20,6 +20,7 @@
 #include "lib/hmap.h"
 #include "lib/sset.h"
 #include "lib/util.h"
+#include "lib/netdev.h"
 #include "lib/vswitch-idl.h"
 #include "openvswitch/vlog.h"
 #include "ovn/lib/ovn-sb-idl.h"
@@ -27,6 +28,15 @@
 
 VLOG_DEFINE_THIS_MODULE(binding);
 
+#define OVN_QOS_TYPE "linux-htb"
+
+struct qos_queue {
+    uint32_t queue_id;
+    uint32_t max_rate;
+    uint32_t burst;
+    struct hmap_node node;
+};
+
 void
 binding_register_ovs_idl(struct ovsdb_idl *ovs_idl)
 {
@@ -44,16 +54,16 @@ binding_register_ovs_idl(struct ovsdb_idl *ovs_idl)
     ovsdb_idl_add_table(ovs_idl, &ovsrec_table_interface);
     ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_name);
     ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_external_ids);
-    ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_ingress_policing_rate);
-    ovsdb_idl_add_column(ovs_idl,
-                         &ovsrec_interface_col_ingress_policing_burst);
+    ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_status);
 }
 
 static void
-get_local_iface_ids(const struct ovsrec_bridge *br_int, struct shash *lports)
+get_local_iface_ids(const struct ovsrec_bridge *br_int, struct shash *lports,
+                    const struct ovsrec_interface **tunnel_iface)
 {
     int i;
 
+    *tunnel_iface = NULL;
     for (i = 0; i < br_int->n_ports; i++) {
         const struct ovsrec_port *port_rec = br_int->ports[i];
         const char *iface_id;
@@ -68,10 +78,16 @@ get_local_iface_ids(const struct ovsrec_bridge *br_int, 
struct shash *lports)
 
             iface_rec = port_rec->interfaces[j];
             iface_id = smap_get(&iface_rec->external_ids, "iface-id");
-            if (!iface_id) {
-                continue;
+            if (iface_id) {
+                shash_add(lports, iface_id, iface_rec);
+            }
+
+            if (!*tunnel_iface) {
+                /*Check if this is a tunnel interface*/
+                if (smap_get(&iface_rec->options, "remote_ip")) {
+                    *tunnel_iface = iface_rec;
+                }
             }
-            shash_add(lports, iface_id, iface_rec);
         }
     }
 }
@@ -136,14 +152,99 @@ add_local_datapath(struct hmap *local_datapaths,
 }
 
 static void
-update_qos(const struct ovsrec_interface *iface_rec,
-           const struct sbrec_port_binding *pb)
+get_qos_params(const struct sbrec_port_binding *pb,
+               struct hmap *queue_map)
 {
-    int rate = smap_get_int(&pb->options, "policing_rate", 0);
-    int burst = smap_get_int(&pb->options, "policing_burst", 0);
+    uint32_t max_rate = smap_get_int(&pb->options, "qos_max_rate", 0);
+    uint32_t burst = smap_get_int(&pb->options, "qos_burst", 0);
+    uint32_t queue_id = smap_get_int(&pb->options, "qdisc_queue_id", 0);
 
-    ovsrec_interface_set_ingress_policing_rate(iface_rec, MAX(0, rate));
-    ovsrec_interface_set_ingress_policing_burst(iface_rec, MAX(0, burst));
+    if ((!max_rate && !burst) || !queue_id) {
+        /*Qos is not configured for this port*/
+        return;
+    }
+
+    struct qos_queue *node = xzalloc(sizeof *node);
+    node->max_rate = max_rate;
+    node->burst = burst;
+    node->queue_id = queue_id;
+    hmap_insert(queue_map, &node->node, hash_int(queue_id, 0));
+}
+
+static void
+setup_qos(const struct ovsrec_interface *tunnel_iface, struct hmap *queue_map)
+{
+    const char *egress_iface;
+    struct netdev *netdev_phy;
+
+    egress_iface = smap_get(&tunnel_iface->status, "tunnel_egress_iface");
+    if (!egress_iface) {
+        /*Queues cannot be configured*/
+        return;
+    }
+
+    if (netdev_open(egress_iface, NULL, &netdev_phy) != 0) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+        VLOG_WARN_RL(&rl, "Unable to open netdev %s\n", egress_iface);
+        return;
+    }
+
+    /*Check and configure qdisc*/
+    const char *qdisc_type;
+    struct smap qdisc_details;
+
+    smap_init(&qdisc_details);
+    if (netdev_get_qos(netdev_phy, &qdisc_type, &qdisc_details) != 0 ||
+        qdisc_type[0] == '\0') {
+        /*Qos is not supported*/
+        return;
+    }
+
+    if (strcmp(qdisc_type, OVN_QOS_TYPE)) {
+        netdev_set_qos(netdev_phy, OVN_QOS_TYPE, &qdisc_details);
+    }
+
+    /*Check and delete if needed*/
+    struct netdev_queue_dump dump;
+    unsigned int queue_id;
+    struct smap queue_details;
+    struct qos_queue *sb_info;
+
+    smap_init(&queue_details);
+    netdev_queue_dump_start(&dump, netdev_phy);
+    while (netdev_queue_dump_next(&dump, &queue_id, &queue_details)) {
+        bool is_queue_needed = false;
+
+        HMAP_FOR_EACH_WITH_HASH (sb_info, node, hash_int(queue_id, 0), 
queue_map) {
+            is_queue_needed = true;
+            if (sb_info->max_rate == smap_get_int(&queue_details, "max-rate", 
0) ||
+                sb_info->burst == smap_get_int(&queue_details, "burst", 0)) {
+                /*This queue is consistent*/
+                hmap_remove(queue_map, &sb_info->node);
+                free(sb_info);
+                break;
+            }
+        }
+
+        if (!is_queue_needed) {
+            netdev_delete_queue(netdev_phy, queue_id);
+        }
+    }
+
+    if (netdev_queue_dump_done(&dump)) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+        VLOG_WARN_RL(&rl, "netdev queue dump did not complete successfully");
+    }
+
+    /*Create/Update queues*/
+    HMAP_FOR_EACH(sb_info, node, queue_map) {
+        smap_clear(&queue_details);
+        smap_add_format(&queue_details, "max-rate", "%d", sb_info->max_rate);
+        smap_add_format(&queue_details, "burst", "%d", sb_info->burst);
+        netdev_set_queue(netdev_phy, sb_info->queue_id, &queue_details);
+    }
+    smap_destroy(&queue_details);
+    netdev_close(netdev_phy);
 }
 
 void
@@ -153,12 +254,15 @@ binding_run(struct controller_ctx *ctx, const struct 
ovsrec_bridge *br_int,
 {
     const struct sbrec_chassis *chassis_rec;
     const struct sbrec_port_binding *binding_rec;
+    const struct ovsrec_interface *tunnel_iface = NULL;
+    struct hmap qos_map;
 
     chassis_rec = get_chassis(ctx->ovnsb_idl, chassis_id);
 
+    hmap_init(&qos_map);
     struct shash lports = SHASH_INITIALIZER(&lports);
     if (br_int) {
-        get_local_iface_ids(br_int, &lports);
+        get_local_iface_ids(br_int, &lports, &tunnel_iface);
     } else {
         /* We have no integration bridge, therefore no local logical ports.
          * We'll remove our chassis from all port binding records below. */
@@ -184,8 +288,8 @@ binding_run(struct controller_ctx *ctx, const struct 
ovsrec_bridge *br_int,
                 sset_add(&all_lports, binding_rec->logical_port);
             }
             add_local_datapath(local_datapaths, binding_rec);
-            if (iface_rec && ctx->ovs_idl_txn) {
-                update_qos(iface_rec, binding_rec);
+            if (iface_rec && tunnel_iface && ctx->ovs_idl_txn) {
+                get_qos_params(binding_rec, &qos_map);
             }
             if (ctx->ovnsb_idl_txn && chassis_rec
                 && binding_rec->chassis != chassis_rec) {
@@ -221,7 +325,11 @@ binding_run(struct controller_ctx *ctx, const struct 
ovsrec_bridge *br_int,
     }
 
     update_ct_zones(&all_lports, ct_zones, ct_zone_bitmap);
+    if (tunnel_iface) {
+        setup_qos(tunnel_iface, &qos_map);
+    }
 
+    hmap_destroy(&qos_map);
     shash_destroy(&lports);
     sset_destroy(&all_lports);
 }
diff --git a/ovn/lib/actions.c b/ovn/lib/actions.c
index 5f0bf19..a7eb865 100644
--- a/ovn/lib/actions.c
+++ b/ovn/lib/actions.c
@@ -413,6 +413,35 @@ parse_put_arp_action(struct action_context *ctx)
     restore_args(ctx, args, ARRAY_SIZE(args));
 }
 
+#define QDISC_MIN_QUEUE_ID (1)
+#define QDISC_MAX_QUEUE_ID (0xf000)
+
+static void
+parse_set_queue_action(struct action_context *ctx)
+{
+    int queue_id;
+
+    if (!lexer_match(ctx->lexer, LEX_T_LPAREN)) {
+        action_syntax_error(ctx, "expecting `('");
+        return;
+    }
+    if (!action_get_int(ctx, &queue_id)) {
+        return;
+    }
+    if (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
+        action_syntax_error(ctx, "expecting `)'");
+        return;
+    }
+    if (queue_id < QDISC_MIN_QUEUE_ID ||
+        queue_id > QDISC_MAX_QUEUE_ID) {
+        action_error(ctx, "queue_id for set_queue is not in valid range.");
+        return;
+    }
+
+    struct ofpact_queue *set_queue = ofpact_put_SET_QUEUE(ctx->ofpacts);
+    set_queue->queue_id = queue_id;
+}
+
 static void
 emit_ct(struct action_context *ctx, bool recirc_next, bool commit)
 {
@@ -475,6 +504,8 @@ parse_action(struct action_context *ctx)
         parse_get_arp_action(ctx);
     } else if (lexer_match_id(ctx->lexer, "put_arp")) {
         parse_put_arp_action(ctx);
+    } else if (lexer_match_id(ctx->lexer, "set_queue")) {
+        parse_set_queue_action(ctx);
     } else {
         action_syntax_error(ctx, "expecting action");
     }
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index 44e9430..81c1519 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -234,6 +234,81 @@ allocate_tnlid(struct hmap *set, const char *name, 
uint32_t max,
     return 0;
 }
 
+#define QDISC_MIN_QUEUE_ID  (1)
+#define QDISC_MAX_QUEUE_ID  (0xf000)
+
+struct ovn_chassis_qdisc_queues {
+    uint32_t queue_id;
+    struct hmap_node key_node;
+};
+
+static void
+destroy_chassis_queues(struct hmap *set)
+{
+    struct ovn_chassis_qdisc_queues *node;
+    HMAP_FOR_EACH_POP (node, key_node, set) {
+        free(node);
+    }
+    hmap_destroy(set);
+}
+
+static void
+add_chassis_queue(struct hmap *set, const char *chassis_name,
+                  uint32_t queue_id)
+{
+    struct ovn_chassis_qdisc_queues *node = xmalloc(sizeof *node);
+    node->queue_id = queue_id;
+    hmap_insert(set, &node->key_node, hash_string(chassis_name, 0));
+}
+
+static bool
+chassis_queueid_in_use(const struct hmap *set, const char *chassis,
+                       uint32_t queue_id)
+{
+    const struct ovn_chassis_qdisc_queues *node;
+    HMAP_FOR_EACH_WITH_HASH (node, key_node, hash_string(chassis, 0), set) {
+        if (node->queue_id == queue_id) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static uint32_t
+allocate_chassis_queueid(struct hmap *set, const char *chassis)
+{
+    for (uint32_t queue_id = QDISC_MIN_QUEUE_ID;
+         queue_id <= QDISC_MAX_QUEUE_ID;
+         queue_id++) {
+        if (!chassis_queueid_in_use(set, chassis, queue_id)) {
+            add_chassis_queue(set, chassis, queue_id);
+            return queue_id;
+        }
+    }
+
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+    VLOG_WARN_RL(&rl, "all %s queue ids exhausted", chassis);
+    return 0;
+}
+
+static void
+free_chassis_queueid(struct hmap *set, const char * chassis, uint32_t queue_id)
+{
+    struct ovn_chassis_qdisc_queues *node;
+    HMAP_FOR_EACH_WITH_HASH (node, key_node, hash_string(chassis, 0), set) {
+        if (node->queue_id == queue_id) {
+            hmap_remove(set, &node->key_node);
+            break;
+        }
+    }
+}
+
+static inline bool
+port_has_qos_params(struct smap * opts) {
+    return (smap_get(opts, "qos_max_rate") ||
+            smap_get(opts, "qos_burst"));
+}
+
 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
  * sb->external_ids:logical-switch. */
 struct ovn_datapath {
@@ -556,6 +631,7 @@ ovn_port_allocate_key(struct ovn_datapath *od)
 static void
 join_logical_ports(struct northd_context *ctx,
                    struct hmap *datapaths, struct hmap *ports,
+                   struct hmap *chassis_qdisc_queues,
                    struct ovs_list *sb_only, struct ovs_list *nb_only,
                    struct ovs_list *both)
 {
@@ -587,6 +663,15 @@ join_logical_ports(struct northd_context *ctx,
                     }
                     op->nbs = nbs;
                     ovs_list_remove(&op->list);
+
+                    uint32_t queue_id = (uint32_t)smap_get_int(
+                                    &op->sb->options, "qdisc_queue_id", 0);
+                    if (queue_id && op->sb->chassis) {
+                        add_chassis_queue(
+                             chassis_qdisc_queues, op->sb->chassis->name,
+                             queue_id);
+                    }
+
                     ovs_list_push_back(both, &op->list);
                 } else {
                     op = ovn_port_create(ports, nbs->name, nbs, NULL, NULL);
@@ -686,7 +771,8 @@ join_logical_ports(struct northd_context *ctx,
 }
 
 static void
-ovn_port_update_sbrec(const struct ovn_port *op)
+ovn_port_update_sbrec(const struct ovn_port *op,
+                      struct hmap *chassis_qdisc_queues)
 {
     sbrec_port_binding_set_datapath(op->sb, op->od->sb);
     if (op->nbr) {
@@ -701,8 +787,31 @@ ovn_port_update_sbrec(const struct ovn_port *op)
         sbrec_port_binding_set_mac(op->sb, NULL, 0);
     } else {
         if (strcmp(op->nbs->type, "router")) {
+            uint32_t queue_id = smap_get_int(
+                    &op->sb->options, "qdisc_queue_id", 0);
+            struct smap options;
+
+            smap_clone(&options, &op->nbs->options);
+            if (op->sb->chassis && port_has_qos_params(&options)
+                && !queue_id) {
+                queue_id = allocate_chassis_queueid(chassis_qdisc_queues,
+                                                    op->sb->chassis->name);
+            }
+            if (!port_has_qos_params(&options) && queue_id) {
+                /*Free this queue*/
+                free_chassis_queueid(chassis_qdisc_queues,
+                                     op->sb->chassis->name,
+                                     queue_id);
+                queue_id = 0;
+            }
+
+            if (queue_id) {
+                /*Only when there is a valid queue*/
+                smap_add_format(&options,
+                                "qdisc_queue_id", "%d", queue_id);
+            }
             sbrec_port_binding_set_type(op->sb, op->nbs->type);
-            sbrec_port_binding_set_options(op->sb, &op->nbs->options);
+            sbrec_port_binding_set_options(op->sb, &options);
         } else {
             sbrec_port_binding_set_type(op->sb, "patch");
 
@@ -732,14 +841,18 @@ build_ports(struct northd_context *ctx, struct hmap 
*datapaths,
             struct hmap *ports)
 {
     struct ovs_list sb_only, nb_only, both;
+    struct hmap chassis_qdisc_queues;
+
+    hmap_init(&chassis_qdisc_queues);
 
-    join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
+    join_logical_ports(ctx, datapaths, ports, &chassis_qdisc_queues,
+                       &sb_only, &nb_only, &both);
 
     /* For logical ports that are in both databases, update the southbound
      * record based on northbound data.  Also index the in-use tunnel_keys. */
     struct ovn_port *op, *next;
     LIST_FOR_EACH_SAFE (op, next, list, &both) {
-        ovn_port_update_sbrec(op);
+        ovn_port_update_sbrec(op, &chassis_qdisc_queues);
 
         add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
         if (op->sb->tunnel_key > op->od->port_key_hint) {
@@ -755,7 +868,7 @@ build_ports(struct northd_context *ctx, struct hmap 
*datapaths,
         }
 
         op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
-        ovn_port_update_sbrec(op);
+        ovn_port_update_sbrec(op, &chassis_qdisc_queues);
 
         sbrec_port_binding_set_logical_port(op->sb, op->key);
         sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
@@ -767,6 +880,8 @@ build_ports(struct northd_context *ctx, struct hmap 
*datapaths,
         sbrec_port_binding_delete(op->sb);
         ovn_port_destroy(ports, op);
     }
+
+    destroy_chassis_queues(&chassis_qdisc_queues);
 }
 
 #define OVN_MIN_MULTICAST 32768
@@ -1472,12 +1587,19 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap 
*ports,
         }
 
         struct ds match = DS_EMPTY_INITIALIZER;
+        struct ds action = DS_EMPTY_INITIALIZER;
+
         ds_put_format(&match, "inport == %s", op->json_key);
         build_port_security_l2(
             "eth.src", op->nbs->port_security, op->nbs->n_port_security,
             &match);
+        const char *queue_id = smap_get(&op->sb->options, "qdisc_queue_id");
+        if (queue_id) {
+            ds_put_format(&action, "set_queue(%s);", queue_id);
+        }
+        ds_put_cstr(&action, "next;");
         ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
-                      ds_cstr(&match), "next;");
+                      ds_cstr(&match), ds_cstr(&action));
         ds_destroy(&match);
 
         if (op->nbs->n_port_security) {
diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml
index c01455d..45306dc 100644
--- a/ovn/ovn-nb.xml
+++ b/ovn/ovn-nb.xml
@@ -202,14 +202,14 @@
           (empty string)
         </p>
 
-        <column name="options" key="policing_rate">
+        <column name="options" key="qos_max_rate">
           If set, indicates the maximum rate for data sent from this interface,
-          in kbps. Data exceeding this rate is dropped.
+          in bit/s. The traffic will be shaped according to this limit.
         </column>
 
-        <column name="options" key="policing_burst">
+        <column name="options" key="qos_burst">
           If set, indicates the maximum burst size for data sent from this
-          interface, in kb.
+          interface, in bits.
         </column>
       </group>
     </group>
diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
index efd2f9a..eb6f745 100644
--- a/ovn/ovn-sb.xml
+++ b/ovn/ovn-sb.xml
@@ -1022,6 +1022,23 @@
 
           <p><b>Example:</b> <code>put_arp(inport, arp.spa, 
arp.sha);</code></p>
         </dd>
+
+        <dt>
+          <code>set_queue(<var>queue_number</var>);</code>
+        </dt>
+
+        <dd>
+          <p>
+            <b>Parameters</b>: Queue number <var>queue_number</var>, 32-bit
+          </p>
+
+          <p>
+            This is equivalent to Openflow set_queue. queue_number should be
+            in the range of 1 to 61440
+          </p>
+
+          <p><b>Example:</b> <code>set_queue(10);</code></p>
+        </dd>
       </dl>
 
       <p>
@@ -1390,14 +1407,20 @@ tcp.flags = RST;
         (empty string)
       </p>
 
-      <column name="options" key="policing_rate">
+      <column name="options" key="qos_max_rate">
         If set, indicates the maximum rate for data sent from this interface,
-        in kbps. Data exceeding this rate is dropped.
+        in bit/s. The traffic will be shaped according to this limit.
       </column>
 
-      <column name="options" key="policing_burst">
+      <column name="options" key="qos_burst">
         If set, indicates the maximum burst size for data sent from this
-        interface, in kb.
+        interface, in bits.
+      </column>
+
+      <column name="options" key="qdisc_queue_id">
+        Indicates the queue number on the physical device. This is same as the
+        queue_id used in OpenFlow in struct ofp_action_enqueue. Value should
+        be in the range of 1 to 61440.
       </column>
     </group>
 
-- 
2.5.0

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to