Ethan, I have not worked on this since, so this is the current version.
Jarno On Apr 8, 2014, at 12:04 PM, Ethan Jackson <et...@nicira.com> wrote: > Should I review this version of the patch? Or do you have a non-RFC > version ready? > > Ethan > > On Mon, Mar 10, 2014 at 6:44 PM, Jarno Rajahalme <jrajaha...@nicira.com> > wrote: >> Intended to send this as an RFC, >> >> Jarno >> >> On Mar 10, 2014, at 6:43 PM, Jarno Rajahalme <jrajaha...@nicira.com> wrote: >> >>> This should optimize port masks for megaflows for typical port usage >>> in matches. Each subtable has it's own trie if the subtable matches >>> any of the ports bits. This trie is consulted only after failing >>> lookup to determine the number of bits that needs to be unwildcarded >>> to guarantee that any packet that should match on any of the other >>> rules will NOT match this megaflow. >>> >>> This version is not configurable and is always on. >>> >>> Signed-off-by: Jarno Rajahalme <jrajaha...@nicira.com> >>> --- >>> lib/classifier.c | 90 >>> +++++++++++++++++++++++++++++++++++++++++++++++---- >>> lib/classifier.h | 2 ++ >>> tests/classifier.at | 6 ++-- >>> 3 files changed, 89 insertions(+), 9 deletions(-) >>> >>> diff --git a/lib/classifier.c b/lib/classifier.c >>> index 30a91b7..c36ccf6 100644 >>> --- a/lib/classifier.c >>> +++ b/lib/classifier.c >>> @@ -30,6 +30,10 @@ >>> >>> VLOG_DEFINE_THIS_MODULE(classifier); >>> >>> +/* Ports trie depends on both ports sharing the same ovs_be32. */ >>> +#define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4) >>> +BUILD_ASSERT_DECL((offsetof(struct flow, tp_dst) / 4) == TP_PORTS_OFS32); >>> + >>> struct trie_ctx; >>> static struct cls_subtable *find_subtable(const struct classifier *, >>> const struct minimask *); >>> @@ -71,10 +75,16 @@ static void trie_init(struct classifier *, int trie_idx, >>> const struct mf_field *); >>> static unsigned int trie_lookup(const struct cls_trie *, const struct flow >>> *, >>> unsigned int *checkbits); >>> - >>> +static unsigned int trie_lookup_value(const struct trie_node *, >>> + const ovs_be32 value[], >>> + unsigned int *checkbits); >>> static void trie_destroy(struct trie_node *); >>> static void trie_insert(struct cls_trie *, const struct cls_rule *, int >>> mlen); >>> +static void trie_insert_prefix(struct trie_node **, const ovs_be32 *prefix, >>> + int mlen); >>> static void trie_remove(struct cls_trie *, const struct cls_rule *, int >>> mlen); >>> +static void trie_remove_prefix(struct trie_node **, const ovs_be32 *prefix, >>> + int mlen); >>> static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs, >>> unsigned int nbits); >>> static bool mask_prefix_bits_set(const struct flow_wildcards *, >>> @@ -389,6 +399,23 @@ classifier_replace(struct classifier *cls, struct >>> cls_rule *rule) >>> trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]); >>> } >>> } >>> + >>> + /* Ports trie. */ >>> + if (subtable->ports_mask_len) { >>> + ovs_be32 ports, mask, masked_ports; >>> + >>> + /* We mask the value to be inserted to always have the >>> wildcarded >>> + * bits in known (zero) state, so we can include them in >>> comparison >>> + * and they will always match (== their original value does not >>> + * matter). */ >>> + ports = (OVS_FORCE ovs_be32)miniflow_get(&rule->match.flow, >>> + TP_PORTS_OFS32); >>> + mask = minimask_get(&subtable->mask, TP_PORTS_OFS32); >>> + masked_ports = ports & mask; >>> + >>> + trie_insert_prefix(&subtable->ports_trie, &masked_ports, >>> + subtable->ports_mask_len); >>> + } >>> } else { >>> rule->partition = old_rule->partition; >>> } >>> @@ -421,6 +448,17 @@ classifier_remove(struct classifier *cls, struct >>> cls_rule *rule) >>> >>> subtable = find_subtable(cls, &rule->match.mask); >>> >>> + if (subtable->ports_mask_len) { >>> + ovs_be32 ports, mask, masked_ports; >>> + >>> + ports = (OVS_FORCE ovs_be32)miniflow_get(&rule->match.flow, >>> + TP_PORTS_OFS32); >>> + mask = minimask_get(&subtable->mask, TP_PORTS_OFS32); >>> + masked_ports = ports & mask; >>> + >>> + trie_remove_prefix(&subtable->ports_trie, >>> + &masked_ports, subtable->ports_mask_len); >>> + } >>> for (i = 0; i < cls->n_tries; i++) { >>> if (subtable->trie_plen[i]) { >>> trie_remove(&cls->tries[i], rule, subtable->trie_plen[i]); >>> @@ -859,6 +897,11 @@ insert_subtable(struct classifier *cls, const struct >>> minimask *mask) >>> >>> cls->tries[i].field); >>> } >>> >>> + /* Ports trie. */ >>> + subtable->ports_trie = NULL; >>> + subtable->ports_mask_len = 32 >>> + - ctz32(ntohl((OVS_FORCE ovs_be32)minimask_get(mask, >>> TP_PORTS_OFS32))); >>> + >>> return subtable; >>> } >>> >>> @@ -867,6 +910,9 @@ destroy_subtable(struct classifier *cls, struct >>> cls_subtable *subtable) >>> { >>> int i; >>> >>> + /* Port tries. */ >>> + trie_destroy(subtable->ports_trie); >>> + >>> for (i = 0; i < subtable->n_indices; i++) { >>> hindex_destroy(&subtable->indices[i]); >>> } >>> @@ -1121,6 +1167,23 @@ find_match_wc(const struct cls_subtable *subtable, >>> const struct flow *flow, >>> * but it didn't match. */ >>> rule = NULL; >>> } >>> + if (!rule && subtable->ports_mask_len) { >>> + /* Ports are always part of the final range, if any. >>> + * No match was found for the ports. Use the ports trie to figure >>> out >>> + * which ports bits to unwildcard. */ >>> + unsigned int mbits; >>> + ovs_be32 value, mask; >>> + >>> + mask = minimask_get(&subtable->mask, TP_PORTS_OFS32); >>> + value = ((ovs_be32 *)flow)[TP_PORTS_OFS32] & mask; >>> + trie_lookup_value(subtable->ports_trie, &value, &mbits); >>> + >>> + ((ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |= >>> + mask & htonl(~0 << (32 - mbits)); >>> + >>> + ofs.start = TP_PORTS_OFS32; >>> + goto range_out; >>> + } >>> out: >>> /* Must unwildcard all the fields, as they were looked at. */ >>> flow_wildcards_fold_minimask(wc, &subtable->mask); >>> @@ -1513,14 +1576,18 @@ minimatch_get_prefix(const struct minimatch *match, >>> const struct mf_field *mf) >>> static void >>> trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen) >>> { >>> - const ovs_be32 *prefix = minimatch_get_prefix(&rule->match, >>> trie->field); >>> + trie_insert_prefix(&trie->root, >>> + minimatch_get_prefix(&rule->match, trie->field), >>> mlen); >>> +} >>> + >>> +static void >>> +trie_insert_prefix(struct trie_node **edge, const ovs_be32 *prefix, int >>> mlen) >>> +{ >>> struct trie_node *node; >>> - struct trie_node **edge; >>> int ofs = 0; >>> >>> /* Walk the tree. */ >>> - for (edge = &trie->root; >>> - (node = *edge) != NULL; >>> + for (; (node = *edge) != NULL; >>> edge = trie_next_edge(node, prefix, ofs)) { >>> unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, >>> mlen); >>> ofs += eqbits; >>> @@ -1561,16 +1628,25 @@ trie_insert(struct cls_trie *trie, const struct >>> cls_rule *rule, int mlen) >>> static void >>> trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen) >>> { >>> - const ovs_be32 *prefix = minimatch_get_prefix(&rule->match, >>> trie->field); >>> + trie_remove_prefix(&trie->root, >>> + minimatch_get_prefix(&rule->match, trie->field), >>> mlen); >>> +} >>> + >>> +/* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' >>> mask >>> + * in 'rule'. */ >>> +static void >>> +trie_remove_prefix(struct trie_node **root, const ovs_be32 *prefix, int >>> mlen) >>> +{ >>> struct trie_node *node; >>> struct trie_node **edges[sizeof(union mf_value) * 8]; >>> int depth = 0, ofs = 0; >>> >>> /* Walk the tree. */ >>> - for (edges[depth] = &trie->root; >>> + for (edges[0] = root; >>> (node = *edges[depth]) != NULL; >>> edges[++depth] = trie_next_edge(node, prefix, ofs)) { >>> unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, >>> mlen); >>> + >>> if (eqbits < node->nbits) { >>> /* Mismatch, nothing to be removed. This should never happen, as >>> * only rules in the classifier are ever removed. */ >>> diff --git a/lib/classifier.h b/lib/classifier.h >>> index c3c1c3b..a1f4e4a 100644 >>> --- a/lib/classifier.h >>> +++ b/lib/classifier.h >>> @@ -276,6 +276,8 @@ struct cls_subtable { >>> uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 flow segment boundaries. */ >>> struct hindex indices[CLS_MAX_INDICES]; /* Staged lookup indices. */ >>> unsigned int trie_plen[CLS_MAX_TRIES]; /* Trie prefix length in 'mask'. >>> */ >>> + struct trie_node *ports_trie; /* NULL if none. */ >>> + int ports_mask_len; >>> }; >>> >>> /* Returns true if 'table' is a "catch-all" subtable that will match every >>> diff --git a/tests/classifier.at b/tests/classifier.at >>> index a46c526..96fb29b 100644 >>> --- a/tests/classifier.at >>> +++ b/tests/classifier.at >>> @@ -55,7 +55,7 @@ Datapath actions: drop >>> ]) >>> AT_CHECK([ovs-appctl ofproto/trace br0 >>> 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], >>> [0], [stdout]) >>> AT_CHECK([tail -2 stdout], [0], >>> - [Megaflow: >>> skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=79 >>> + [Megaflow: >>> skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=0x40/0xfff0 >>> Datapath actions: 2 >>> ]) >>> OVS_VSWITCHD_STOP >>> @@ -78,6 +78,8 @@ AT_DATA([flows.txt], [dnl >>> table=0 in_port=1 >>> priority=16,tcp,nw_dst=10.1.0.0/255.255.0.0,action=output(3) >>> table=0 in_port=1 >>> priority=32,tcp,nw_dst=10.1.2.0/255.255.255.0,tp_src=79,action=output(2) >>> table=0 in_port=1 priority=33,tcp,nw_dst=10.1.2.15,tp_dst=80,action=drop >>> +table=0 in_port=1 >>> priority=33,tcp,nw_dst=10.1.2.15,tp_dst=8080,action=output(2) >>> +table=0 in_port=1 >>> priority=33,tcp,nw_dst=10.1.2.15,tp_dst=192,action=output(2) >>> table=0 in_port=1 priority=0,ip,action=drop >>> table=0 in_port=2 >>> priority=16,tcp,nw_dst=192.168.0.0/255.255.0.0,action=output(1) >>> table=0 in_port=2 priority=0,ip,action=drop >>> @@ -102,7 +104,7 @@ Datapath actions: drop >>> ]) >>> AT_CHECK([ovs-appctl ofproto/trace br0 >>> 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], >>> [0], [stdout]) >>> AT_CHECK([tail -2 stdout], [0], >>> - [Megaflow: >>> skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=8,tp_dst=79 >>> + [Megaflow: >>> skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=0x0/0xffc0,tp_dst=0x40/0xfff0 >>> Datapath actions: 3 >>> ]) >>> OVS_VSWITCHD_STOP(["/'prefixes' with incompatible field: ipv6_label/d"]) >>> -- >>> 1.7.10.4 >>> >> >> _______________________________________________ >> dev mailing list >> dev@openvswitch.org >> http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev