I sent some comments (mostly about comments it seems) earlier you may want to revisit (or not):
http://openvswitch.org/pipermail/dev/2013-May/027297.html Jarno On Jun 28, 2013, at 1:30 , ext Ben Pfaff wrote: > We have a controller that puts many rules with different metadata values > into the flow table, where metadata is used (by "resubmit"s) to distinguish > stages in a pipeline. Thus, any given flow only needs to be hashed into > classifier "cls_table"s that contain a match for the flow's metadata value. > This commit optimizes the classifier lookup by (probabilistically) skipping > the "cls_table"s that can't possibly match. > > (The "metadata" referred to here is the OpenFlow 1.1+ "metadata" field, > which is a 64-bit field similar in purpose to the "registers" defined by > Open vSwitch.) > > This speeds up flow setup performance with the controller in question by > about 19%. > > Bug #14282. > Signed-off-by: Ben Pfaff <b...@nicira.com> > --- > lib/classifier.c | 116 +++++++++++++++++++++++++++++++++++++++++++++++++---- > lib/classifier.h | 74 +++++++++++++++++++++++++++++++--- > lib/flow.h | 27 ++++++++++++ > lib/tag.h | 8 +++- > 4 files changed, 209 insertions(+), 16 deletions(-) > > diff --git a/lib/classifier.c b/lib/classifier.c > index bd202f6..0af6e17 100644 > --- a/lib/classifier.c > +++ b/lib/classifier.c > @@ -41,7 +41,7 @@ static void update_tables_after_removal(struct classifier *, > unsigned int del_priority); > > static struct cls_rule *find_match(const struct cls_table *, > - const struct flow *); > + const struct flow *, tag_type tags); > static struct cls_rule *find_equal(struct cls_table *, > const struct miniflow *, uint32_t hash); > static struct cls_rule *insert_rule(struct classifier *, > @@ -143,6 +143,7 @@ classifier_init(struct classifier *cls) > cls->n_rules = 0; > hmap_init(&cls->tables); > list_init(&cls->tables_priority); > + hmap_init(&cls->partitions); > } > > /* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the > @@ -151,12 +152,20 @@ void > classifier_destroy(struct classifier *cls) > { > if (cls) { > + struct cls_table *partition, *next_partition; > struct cls_table *table, *next_table; > > HMAP_FOR_EACH_SAFE (table, next_table, hmap_node, &cls->tables) { > destroy_table(cls, table); > } > hmap_destroy(&cls->tables); > + > + HMAP_FOR_EACH_SAFE (partition, next_partition, hmap_node, > + &cls->partitions) { > + hmap_remove(&cls->partitions, &partition->hmap_node); > + free(partition); > + } > + hmap_destroy(&cls->partitions); > } > } > > @@ -174,6 +183,46 @@ classifier_count(const struct classifier *cls) > return cls->n_rules; > } > > +static uint32_t > +hash_metadata(ovs_be64 metadata_) > +{ > + uint64_t metadata = (OVS_FORCE uint64_t) metadata_; > + return hash_2words(metadata, metadata >> 32); > +} > + > +static struct cls_partition * > +find_partition(const struct classifier *cls, ovs_be64 metadata, uint32_t > hash) > +{ > + struct cls_partition *partition; > + > + HMAP_FOR_EACH_IN_BUCKET (partition, hmap_node, hash, &cls->partitions) { > + if (partition->metadata == metadata) { > + return partition; > + } > + } > + > + return NULL; > +} > + > +static struct cls_partition * > +create_partition(struct classifier *cls, struct cls_table *table, > + ovs_be64 metadata) > +{ > + uint32_t hash = hash_metadata(metadata); > + struct cls_partition *partition = find_partition(cls, metadata, hash); > + if (partition) { > + partition->n_refs++; > + partition->tags |= table->tag; > + } else { > + partition = xmalloc(sizeof *partition); > + partition->n_refs = 1; > + partition->tags = table->tag; > + partition->metadata = metadata; > + hmap_insert(&cls->partitions, &partition->hmap_node, hash); > + } > + return partition; > +} > + > /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller > * must not modify or free it. > * > @@ -200,8 +249,17 @@ classifier_replace(struct classifier *cls, struct > cls_rule *rule) > > old_rule = insert_rule(cls, table, rule); > if (!old_rule) { > + if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) { > + ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow); > + rule->partition = create_partition(cls, table, metadata); > + } else { > + rule->partition = NULL; > + } > + > table->n_table_rules++; > cls->n_rules++; > + } else { > + rule->partition = old_rule->partition; > } > return old_rule; > } > @@ -225,6 +283,7 @@ classifier_insert(struct classifier *cls, struct cls_rule > *rule) > void > classifier_remove(struct classifier *cls, struct cls_rule *rule) > { > + struct cls_partition *partition; > struct cls_rule *head; > struct cls_table *table; > > @@ -242,6 +301,12 @@ classifier_remove(struct classifier *cls, struct > cls_rule *rule) > hmap_replace(&table->rules, &rule->hmap_node, &next->hmap_node); > } > > + partition = rule->partition; > + if (partition && --partition->n_refs == 0) { > + hmap_remove(&cls->partitions, &partition->hmap_node); > + free(partition); > + } > + > if (--table->n_table_rules == 0) { > destroy_table(cls, table); > } else { > @@ -262,12 +327,38 @@ struct cls_rule * > classifier_lookup(const struct classifier *cls, const struct flow *flow, > struct flow_wildcards *wc) > { > + const struct cls_partition *partition; > struct cls_table *table; > struct cls_rule *best; > + tag_type tags; > + > + /* Determine 'tags' such that, if 'table->tag' doesn't intersect them, > then > + * 'flow' cannot possibly match in 'table': > + * > + * - If flow->metadata maps to a given 'partition', then we can use > + * 'tags' for 'partition->tags'. > + * > + * - If flow->metadata has no partition, then no rule in 'cls' has an > + * exact-match for flow->metadata. That means that we don't need > to > + * search any table that includes flow->metadata in its mask. > + * > + * In either case, we always need to search any cls_tables that do not > + * include flow->metadata in its mask. One way to do that would be to > + * check the "cls_table"s explicitly for that, but that would require an > + * extra branch per table. Instead, we mark such a cls_table's 'tags' as > + * TAG_ALL and make sure that 'tags' is never empty. This means that > + * 'tags' always intersects such a cls_table's 'tags', so we don't need a > + * special case. > + */ > + partition = (hmap_is_empty(&cls->partitions) > + ? NULL > + : find_partition(cls, flow->metadata, > + hash_metadata(flow->metadata))); > + tags = partition ? partition->tags : TAG_ARBITRARY; > > best = NULL; > LIST_FOR_EACH (table, list_node, &cls->tables_priority) { > - struct cls_rule *rule = find_match(table, flow); > + struct cls_rule *rule = find_match(table, flow, tags); > > if (wc) { > flow_wildcards_fold_minimask(wc, &table->mask); > @@ -280,7 +371,7 @@ classifier_lookup(const struct classifier *cls, const > struct flow *flow, > * can not find anything better. */ > return best; > } > - rule = find_match(table, flow); > + rule = find_match(table, flow, tags); > if (wc) { > flow_wildcards_fold_minimask(wc, &table->mask); > } > @@ -536,6 +627,7 @@ find_table(const struct classifier *cls, const struct > minimask *mask) > static struct cls_table * > insert_table(struct classifier *cls, const struct minimask *mask) > { > + uint32_t hash = minimask_hash(mask, 0); > struct cls_table *table; > > table = xzalloc(sizeof *table); > @@ -543,6 +635,9 @@ insert_table(struct classifier *cls, const struct > minimask *mask) > minimask_clone(&table->mask, mask); > hmap_insert(&cls->tables, &table->hmap_node, minimask_hash(mask, 0)); > list_push_back(&cls->tables_priority, &table->list_node); > + table->tag = (minimask_get_metadata_mask(mask) == OVS_BE64_MAX > + ? tag_create_deterministic(hash) > + : TAG_ALL); > > return table; > } > @@ -648,14 +743,17 @@ update_tables_after_removal(struct classifier *cls, > struct cls_table *table, > } > > static struct cls_rule * > -find_match(const struct cls_table *table, const struct flow *flow) > +find_match(const struct cls_table *table, const struct flow *flow, > + tag_type tags) > { > - uint32_t hash = flow_hash_in_minimask(flow, &table->mask, 0); > - struct cls_rule *rule; > + if (tag_intersects(tags, table->tag)) { > + uint32_t hash = flow_hash_in_minimask(flow, &table->mask, 0); > + struct cls_rule *rule; > > - HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &table->rules) { > - if (minimatch_matches_flow(&rule->match, flow)) { > - return rule; > + HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &table->rules) { > + if (minimatch_matches_flow(&rule->match, flow)) { > + return rule; > + } > } > } > > diff --git a/lib/classifier.h b/lib/classifier.h > index fdc3af7..07a2929 100644 > --- a/lib/classifier.h > +++ b/lib/classifier.h > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. > + * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > @@ -19,17 +19,66 @@ > > /* Flow classifier. > * > - * A classifier is a "struct classifier", > - * a hash map from a set of wildcards to a "struct cls_table", > - * a hash map from fixed field values to "struct cls_rule", > - * which can contain a list of otherwise identical rules > - * with lower priorities. > + * Basic Design > + * ============ > + * > + * Suppose that all the rules in a classifier had the same form. For > example, > + * suppose that they all matched on the source and destination Ethernet > address > + * and wildcarded all the other fields. Then the obvious way to implement a > + * classifier would be a hash table on the source and destination Ethernet > + * addresses. If new classification rules came along with a different form, > + * you could add a second hash table that hashed on the fields matched in > those > + * rules. With two hash tables, you look up a given flow in each hash table. > + * If there are no matches, the classifier didn't contain a match; if you > find > + * a match in one of them, that's the result; if you find a match in both of > + * them, then the result is the rule with the higher priority. > + * > + * This is how the classifier works. In a "struct classifier", each form of > + * "struct cls_rule" present (based on its ->match.mask) goes into a separate > + * "struct cls_table". A lookup does a hash lookup in every "struct > cls_table" > + * in the classifier and returns the highest-priority match that it finds. > + * > + * One detail: a classifier can contain multiple rules that are identical > other > + * than their priority. When this happens, only the highest priority rule > out > + * of a group of otherwise identical rules is stored directly in the "struct > + * cls_table", with the other almost-identical rules chained off a linked > list > + * inside that highest-priority rule. > + * > + * > + * Partitioning > + * ============ > + * > + * Suppose that a given classifier is being used to handle multiple stages > in a > + * pipeline using "resubmit", with metadata (that is, the OpenFlow 1.1+ field > + * named "metadata") distinguishing between the different stages. For > example, > + * metadata value 1 might identify ingress rules, metadata value 2 might > + * identify ACLs, and metadata value 3 might identify egress rules. Such a > + * classifier is essentially partitioned into multiple sub-classifiers on the > + * basis of the metadata value. > + * > + * The classifier has a special optimization to speed up matching in this > + * scenario: > + * > + * - Each cls_table that matches on metadata gets a random tag (see > tag.h). > + * > + * - For each metadata value matched by any cls_rule, the classifier > + * constructs a "struct cls_partition" indexed by the metadata value. > + * The cls_partition has a 'tags' member whose value is the bitwise-OR > of > + * the tags of each cls_table that contains any rule that matches on > the > + * cls_partition's metadata value. > + * > + * Thus, a flow lookup can start by looking up the partition associated with > + * the flow's metadata, and then skip over any cls_table whose 'tag' does not > + * intersect the partition's 'tags'. (The flow must also be looked up in any > + * cls_table that doesn't match on metadata. We handle that by giving any > such > + * cls_table TAG_ALL as its 'tags' so that it matches any tag.) > */ > > #include "flow.h" > #include "hmap.h" > #include "list.h" > #include "match.h" > +#include "tag.h" > #include "openflow/nicira-ext.h" > #include "openflow/openflow.h" > > @@ -42,6 +91,7 @@ struct classifier { > int n_rules; /* Total number of rules. */ > struct hmap tables; /* Contains "struct cls_table"s. */ > struct list tables_priority; /* Tables in descending priority order */ > + struct hmap partitions; /* Contains "struct cls_partition"s. */ > }; > > /* A set of rules that all have the same fields wildcarded. */ > @@ -53,6 +103,7 @@ struct cls_table { > int n_table_rules; /* Number of rules, including duplicates. */ > unsigned int max_priority; /* Max priority of any rule in the table. */ > unsigned int max_count; /* Count of max_priority rules. */ > + tag_type tag; /* Tag generated from mask for partitioning. > */ > }; > > /* Returns true if 'table' is a "catch-all" table that will match every > @@ -69,6 +120,17 @@ struct cls_rule { > struct list list; /* List of identical, lower-priority rules. */ > struct minimatch match; /* Matching rule. */ > unsigned int priority; /* Larger numbers are higher priorities. */ > + struct cls_partition *partition; > +}; > + > +/* Associates a metadata value (that is, a value of the OpenFlow 1.1+ > metadata > + * field) with tags for the "cls_table"s that contain rules that match that > + * metadata value. */ > +struct cls_partition { > + struct hmap_node hmap_node; /* In struct classifier's 'partitions' hmap. > */ > + ovs_be64 metadata; /* metadata value for this partition. */ > + unsigned int n_refs; /* # of flows that refer to this partition. > */ > + tag_type tags; /* OR of each included flow's cls_table tag. > */ > }; > > void cls_rule_init(struct cls_rule *, const struct match *, > diff --git a/lib/flow.h b/lib/flow.h > index 2c027ed..89c52b1 100644 > --- a/lib/flow.h > +++ b/lib/flow.h > @@ -21,6 +21,7 @@ > #include <stdbool.h> > #include <stdint.h> > #include <string.h> > +#include "byte-order.h" > #include "openflow/nicira-ext.h" > #include "openflow/openflow.h" > #include "hash.h" > @@ -327,6 +328,7 @@ void miniflow_expand(const struct miniflow *, struct flow > *); > > uint32_t miniflow_get(const struct miniflow *, unsigned int u32_ofs); > uint16_t miniflow_get_vid(const struct miniflow *); > +static inline ovs_be64 miniflow_get_metadata(const struct miniflow *); > > bool miniflow_equal(const struct miniflow *a, const struct miniflow *b); > bool miniflow_equal_in_minimask(const struct miniflow *a, > @@ -359,11 +361,36 @@ void minimask_expand(const struct minimask *, struct > flow_wildcards *); > > uint32_t minimask_get(const struct minimask *, unsigned int u32_ofs); > uint16_t minimask_get_vid_mask(const struct minimask *); > +static inline ovs_be64 minimask_get_metadata_mask(const struct minimask *); > > bool minimask_equal(const struct minimask *a, const struct minimask *b); > uint32_t minimask_hash(const struct minimask *, uint32_t basis); > > bool minimask_has_extra(const struct minimask *, const struct minimask *); > bool minimask_is_catchall(const struct minimask *); > + > +/* Returns the value of the OpenFlow 1.1+ "metadata" field in 'flow'. */ > +static inline ovs_be64 > +miniflow_get_metadata(const struct miniflow *flow) > +{ > + enum { MD_OFS = offsetof(struct flow, metadata) }; > + BUILD_ASSERT_DECL(MD_OFS % sizeof(uint32_t) == 0); > + ovs_be32 hi = (OVS_FORCE ovs_be32) miniflow_get(flow, MD_OFS / 4); > + ovs_be32 lo = (OVS_FORCE ovs_be32) miniflow_get(flow, MD_OFS / 4 + 1); > + > + return htonll(((uint64_t) ntohl(hi) << 32) | ntohl(lo)); > +} > + > +/* Returns the mask for the OpenFlow 1.1+ "metadata" field in 'mask'. > + * > + * The return value is all-1-bits if 'mask' matches on the whole value of the > + * metadata field, all-0-bits if 'mask' entirely wildcards the metadata > field, > + * or some other value if the metadata field is partially matched, partially > + * wildcarded. */ > +static inline ovs_be64 > +minimask_get_metadata_mask(const struct minimask *mask) > +{ > + return miniflow_get_metadata(&mask->masks); > +} > > #endif /* flow.h */ > diff --git a/lib/tag.h b/lib/tag.h > index 9d6b4aa..6fec24b 100644 > --- a/lib/tag.h > +++ b/lib/tag.h > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2008, 2011, 2012 Nicira, Inc. > + * Copyright (c) 2008, 2011, 2012, 2013 Nicira, Inc. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > @@ -68,6 +68,12 @@ > /* Represents a tag, or the combination of 0 or more tags. */ > typedef uint32_t tag_type; > > +/* A 'tag_type' value that intersects every tag. */ > +#define TAG_ALL UINT32_MAX > + > +/* An arbitrary tag. */ > +#define TAG_ARBITRARY UINT32_C(3) > + > tag_type tag_create_random(void); > tag_type tag_create_deterministic(uint32_t seed); > static inline bool tag_intersects(tag_type, tag_type); > -- > 1.7.2.5 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev