This should optimize port masks for megaflows for typical port usage
in matches.  Each subtable has it's own trie if the subtable matches
any of the ports bits.  This trie is consulted only after failing
lookup to determine the number of bits that needs to be unwildcarded
to guarantee that any packet that should match on any of the other
rules will NOT match this megaflow.

This version is not configurable and is always on.

Signed-off-by: Jarno Rajahalme <jrajaha...@nicira.com>
---
v2: Rebase to master.

 lib/classifier.c    |   90 +++++++++++++++++++++++++++++++++++++++++++++++----
 lib/classifier.h    |    2 ++
 tests/classifier.at |    6 ++--
 3 files changed, 89 insertions(+), 9 deletions(-)

diff --git a/lib/classifier.c b/lib/classifier.c
index 55ca5ea..ef20bd1 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -30,6 +30,10 @@
 
 VLOG_DEFINE_THIS_MODULE(classifier);
 
+/* Ports trie depends on both ports sharing the same ovs_be32. */
+#define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4)
+BUILD_ASSERT_DECL((offsetof(struct flow, tp_dst) / 4) == TP_PORTS_OFS32);
+
 struct trie_ctx;
 static struct cls_subtable *find_subtable(const struct classifier *,
                                           const struct minimask *);
@@ -71,10 +75,16 @@ static void trie_init(struct classifier *, int trie_idx,
                       const struct mf_field *);
 static unsigned int trie_lookup(const struct cls_trie *, const struct flow *,
                                 unsigned int *checkbits);
-
+static unsigned int trie_lookup_value(const struct trie_node *,
+                                      const ovs_be32 value[],
+                                      unsigned int *checkbits);
 static void trie_destroy(struct trie_node *);
 static void trie_insert(struct cls_trie *, const struct cls_rule *, int mlen);
+static void trie_insert_prefix(struct trie_node **, const ovs_be32 *prefix,
+                               int mlen);
 static void trie_remove(struct cls_trie *, const struct cls_rule *, int mlen);
+static void trie_remove_prefix(struct trie_node **, const ovs_be32 *prefix,
+                               int mlen);
 static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs,
                                  unsigned int nbits);
 static bool mask_prefix_bits_set(const struct flow_wildcards *,
@@ -389,6 +399,23 @@ classifier_replace(struct classifier *cls, struct cls_rule 
*rule)
                 trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]);
             }
         }
+
+        /* Ports trie. */
+        if (subtable->ports_mask_len) {
+            ovs_be32 ports, mask, masked_ports;
+
+            /* We mask the value to be inserted to always have the wildcarded
+             * bits in known (zero) state, so we can include them in comparison
+             * and they will always match (== their original value does not
+             * matter). */
+            ports = (OVS_FORCE ovs_be32)miniflow_get(&rule->match.flow,
+                                                     TP_PORTS_OFS32);
+            mask = minimask_get(&subtable->mask, TP_PORTS_OFS32);
+            masked_ports = ports & mask;
+
+            trie_insert_prefix(&subtable->ports_trie, &masked_ports,
+                               subtable->ports_mask_len);
+        }
     } else {
         rule->partition = old_rule->partition;
     }
@@ -421,6 +448,17 @@ classifier_remove(struct classifier *cls, struct cls_rule 
*rule)
 
     subtable = find_subtable(cls, &rule->match.mask);
 
+    if (subtable->ports_mask_len) {
+        ovs_be32 ports, mask, masked_ports;
+
+        ports = (OVS_FORCE ovs_be32)miniflow_get(&rule->match.flow,
+                                                 TP_PORTS_OFS32);
+        mask = minimask_get(&subtable->mask, TP_PORTS_OFS32);
+        masked_ports = ports & mask;
+
+        trie_remove_prefix(&subtable->ports_trie,
+                           &masked_ports, subtable->ports_mask_len);
+    }
     for (i = 0; i < cls->n_tries; i++) {
         if (subtable->trie_plen[i]) {
             trie_remove(&cls->tries[i], rule, subtable->trie_plen[i]);
@@ -859,6 +897,11 @@ insert_subtable(struct classifier *cls, const struct 
minimask *mask)
                                                          cls->tries[i].field);
     }
 
+    /* Ports trie. */
+    subtable->ports_trie = NULL;
+    subtable->ports_mask_len = 32
+        - ctz32(ntohl((OVS_FORCE ovs_be32)minimask_get(mask, TP_PORTS_OFS32)));
+
     return subtable;
 }
 
@@ -867,6 +910,9 @@ destroy_subtable(struct classifier *cls, struct 
cls_subtable *subtable)
 {
     int i;
 
+    /* Port tries. */
+    trie_destroy(subtable->ports_trie);
+
     for (i = 0; i < subtable->n_indices; i++) {
         hindex_destroy(&subtable->indices[i]);
     }
@@ -1121,6 +1167,23 @@ find_match_wc(const struct cls_subtable *subtable, const 
struct flow *flow,
          * but it didn't match. */
         rule = NULL;
     }
+    if (!rule && subtable->ports_mask_len) {
+        /* Ports are always part of the final range, if any.
+         * No match was found for the ports.  Use the ports trie to figure out
+         * which ports bits to unwildcard. */
+        unsigned int mbits;
+        ovs_be32 value, mask;
+
+        mask = minimask_get(&subtable->mask, TP_PORTS_OFS32);
+        value = ((ovs_be32 *)flow)[TP_PORTS_OFS32] & mask;
+        trie_lookup_value(subtable->ports_trie, &value, &mbits);
+
+        ((ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |=
+            mask & htonl(~0 << (32 - mbits));
+
+        ofs.start = TP_PORTS_OFS32;
+        goto range_out;
+    }
  out:
     /* Must unwildcard all the fields, as they were looked at. */
     flow_wildcards_fold_minimask(wc, &subtable->mask);
@@ -1513,14 +1576,18 @@ minimatch_get_prefix(const struct minimatch *match, 
const struct mf_field *mf)
 static void
 trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
 {
-    const ovs_be32 *prefix = minimatch_get_prefix(&rule->match, trie->field);
+    trie_insert_prefix(&trie->root,
+                       minimatch_get_prefix(&rule->match, trie->field), mlen);
+}
+
+static void
+trie_insert_prefix(struct trie_node **edge, const ovs_be32 *prefix, int mlen)
+{
     struct trie_node *node;
-    struct trie_node **edge;
     int ofs = 0;
 
     /* Walk the tree. */
-    for (edge = &trie->root;
-         (node = *edge) != NULL;
+    for (; (node = *edge) != NULL;
          edge = trie_next_edge(node, prefix, ofs)) {
         unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);
         ofs += eqbits;
@@ -1561,16 +1628,25 @@ trie_insert(struct cls_trie *trie, const struct 
cls_rule *rule, int mlen)
 static void
 trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
 {
-    const ovs_be32 *prefix = minimatch_get_prefix(&rule->match, trie->field);
+    trie_remove_prefix(&trie->root,
+                       minimatch_get_prefix(&rule->match, trie->field), mlen);
+}
+
+/* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
+ * in 'rule'. */
+static void
+trie_remove_prefix(struct trie_node **root, const ovs_be32 *prefix, int mlen)
+{
     struct trie_node *node;
     struct trie_node **edges[sizeof(union mf_value) * 8];
     int depth = 0, ofs = 0;
 
     /* Walk the tree. */
-    for (edges[depth] = &trie->root;
+    for (edges[0] = root;
          (node = *edges[depth]) != NULL;
          edges[++depth] = trie_next_edge(node, prefix, ofs)) {
         unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);
+
         if (eqbits < node->nbits) {
             /* Mismatch, nothing to be removed.  This should never happen, as
              * only rules in the classifier are ever removed. */
diff --git a/lib/classifier.h b/lib/classifier.h
index c3c1c3b..a1f4e4a 100644
--- a/lib/classifier.h
+++ b/lib/classifier.h
@@ -276,6 +276,8 @@ struct cls_subtable {
     uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 flow segment boundaries. */
     struct hindex indices[CLS_MAX_INDICES]; /* Staged lookup indices. */
     unsigned int trie_plen[CLS_MAX_TRIES];  /* Trie prefix length in 'mask'. */
+    struct trie_node *ports_trie; /* NULL if none. */
+    int ports_mask_len;
 };
 
 /* Returns true if 'table' is a "catch-all" subtable that will match every
diff --git a/tests/classifier.at b/tests/classifier.at
index b6c9352..5338a11 100644
--- a/tests/classifier.at
+++ b/tests/classifier.at
@@ -55,7 +55,7 @@ Datapath actions: drop
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 
'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'],
 [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: 
recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=79
+  [Megaflow: 
recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=0x40/0xfff0
 Datapath actions: 2
 ])
 OVS_VSWITCHD_STOP
@@ -78,6 +78,8 @@ AT_DATA([flows.txt], [dnl
 table=0 in_port=1 priority=16,tcp,nw_dst=10.1.0.0/255.255.0.0,action=output(3)
 table=0 in_port=1 
priority=32,tcp,nw_dst=10.1.2.0/255.255.255.0,tp_src=79,action=output(2)
 table=0 in_port=1 priority=33,tcp,nw_dst=10.1.2.15,tp_dst=80,action=drop
+table=0 in_port=1 priority=33,tcp,nw_dst=10.1.2.15,tp_dst=8080,action=output(2)
+table=0 in_port=1 priority=33,tcp,nw_dst=10.1.2.15,tp_dst=192,action=output(2)
 table=0 in_port=1 priority=0,ip,action=drop
 table=0 in_port=2 
priority=16,tcp,nw_dst=192.168.0.0/255.255.0.0,action=output(1)
 table=0 in_port=2 priority=0,ip,action=drop
@@ -102,7 +104,7 @@ Datapath actions: drop
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 
'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'],
 [0], [stdout])
 AT_CHECK([tail -2 stdout], [0],
-  [Megaflow: 
recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=8,tp_dst=79
+  [Megaflow: 
recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=0x0/0xffc0,tp_dst=0x40/0xfff0
 Datapath actions: 3
 ])
 OVS_VSWITCHD_STOP(["/'prefixes' with incompatible field: ipv6_label/d"])
-- 
1.7.10.4

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to