Thanks for the reminder. They all look like good comments to me, but I'm reposting the series with the goal of getting some performance measurements. (The 19% in the commit log is obsolete.) If the performance measurements are good, I'll revise it.
I'm going to repost the series again in a moment. I'll mention this in the commit log this time. On Fri, Jun 28, 2013 at 07:49:49AM +0000, Rajahalme, Jarno (NSN - FI/Espoo) wrote: > I sent some comments (mostly about comments it seems) earlier you may want to > revisit (or not): > > http://openvswitch.org/pipermail/dev/2013-May/027297.html > > Jarno > > On Jun 28, 2013, at 1:30 , ext Ben Pfaff wrote: > > > We have a controller that puts many rules with different metadata values > > into the flow table, where metadata is used (by "resubmit"s) to distinguish > > stages in a pipeline. Thus, any given flow only needs to be hashed into > > classifier "cls_table"s that contain a match for the flow's metadata value. > > This commit optimizes the classifier lookup by (probabilistically) skipping > > the "cls_table"s that can't possibly match. > > > > (The "metadata" referred to here is the OpenFlow 1.1+ "metadata" field, > > which is a 64-bit field similar in purpose to the "registers" defined by > > Open vSwitch.) > > > > This speeds up flow setup performance with the controller in question by > > about 19%. > > > > Bug #14282. > > Signed-off-by: Ben Pfaff <b...@nicira.com> > > --- > > lib/classifier.c | 116 > > +++++++++++++++++++++++++++++++++++++++++++++++++---- > > lib/classifier.h | 74 +++++++++++++++++++++++++++++++--- > > lib/flow.h | 27 ++++++++++++ > > lib/tag.h | 8 +++- > > 4 files changed, 209 insertions(+), 16 deletions(-) > > > > diff --git a/lib/classifier.c b/lib/classifier.c > > index bd202f6..0af6e17 100644 > > --- a/lib/classifier.c > > +++ b/lib/classifier.c > > @@ -41,7 +41,7 @@ static void update_tables_after_removal(struct classifier > > *, > > unsigned int del_priority); > > > > static struct cls_rule *find_match(const struct cls_table *, > > - const struct flow *); > > + const struct flow *, tag_type tags); > > static struct cls_rule *find_equal(struct cls_table *, > > const struct miniflow *, uint32_t hash); > > static struct cls_rule *insert_rule(struct classifier *, > > @@ -143,6 +143,7 @@ classifier_init(struct classifier *cls) > > cls->n_rules = 0; > > hmap_init(&cls->tables); > > list_init(&cls->tables_priority); > > + hmap_init(&cls->partitions); > > } > > > > /* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the > > @@ -151,12 +152,20 @@ void > > classifier_destroy(struct classifier *cls) > > { > > if (cls) { > > + struct cls_table *partition, *next_partition; > > struct cls_table *table, *next_table; > > > > HMAP_FOR_EACH_SAFE (table, next_table, hmap_node, &cls->tables) { > > destroy_table(cls, table); > > } > > hmap_destroy(&cls->tables); > > + > > + HMAP_FOR_EACH_SAFE (partition, next_partition, hmap_node, > > + &cls->partitions) { > > + hmap_remove(&cls->partitions, &partition->hmap_node); > > + free(partition); > > + } > > + hmap_destroy(&cls->partitions); > > } > > } > > > > @@ -174,6 +183,46 @@ classifier_count(const struct classifier *cls) > > return cls->n_rules; > > } > > > > +static uint32_t > > +hash_metadata(ovs_be64 metadata_) > > +{ > > + uint64_t metadata = (OVS_FORCE uint64_t) metadata_; > > + return hash_2words(metadata, metadata >> 32); > > +} > > + > > +static struct cls_partition * > > +find_partition(const struct classifier *cls, ovs_be64 metadata, uint32_t > > hash) > > +{ > > + struct cls_partition *partition; > > + > > + HMAP_FOR_EACH_IN_BUCKET (partition, hmap_node, hash, &cls->partitions) > > { > > + if (partition->metadata == metadata) { > > + return partition; > > + } > > + } > > + > > + return NULL; > > +} > > + > > +static struct cls_partition * > > +create_partition(struct classifier *cls, struct cls_table *table, > > + ovs_be64 metadata) > > +{ > > + uint32_t hash = hash_metadata(metadata); > > + struct cls_partition *partition = find_partition(cls, metadata, hash); > > + if (partition) { > > + partition->n_refs++; > > + partition->tags |= table->tag; > > + } else { > > + partition = xmalloc(sizeof *partition); > > + partition->n_refs = 1; > > + partition->tags = table->tag; > > + partition->metadata = metadata; > > + hmap_insert(&cls->partitions, &partition->hmap_node, hash); > > + } > > + return partition; > > +} > > + > > /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the > > caller > > * must not modify or free it. > > * > > @@ -200,8 +249,17 @@ classifier_replace(struct classifier *cls, struct > > cls_rule *rule) > > > > old_rule = insert_rule(cls, table, rule); > > if (!old_rule) { > > + if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) > > { > > + ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow); > > + rule->partition = create_partition(cls, table, metadata); > > + } else { > > + rule->partition = NULL; > > + } > > + > > table->n_table_rules++; > > cls->n_rules++; > > + } else { > > + rule->partition = old_rule->partition; > > } > > return old_rule; > > } > > @@ -225,6 +283,7 @@ classifier_insert(struct classifier *cls, struct > > cls_rule *rule) > > void > > classifier_remove(struct classifier *cls, struct cls_rule *rule) > > { > > + struct cls_partition *partition; > > struct cls_rule *head; > > struct cls_table *table; > > > > @@ -242,6 +301,12 @@ classifier_remove(struct classifier *cls, struct > > cls_rule *rule) > > hmap_replace(&table->rules, &rule->hmap_node, &next->hmap_node); > > } > > > > + partition = rule->partition; > > + if (partition && --partition->n_refs == 0) { > > + hmap_remove(&cls->partitions, &partition->hmap_node); > > + free(partition); > > + } > > + > > if (--table->n_table_rules == 0) { > > destroy_table(cls, table); > > } else { > > @@ -262,12 +327,38 @@ struct cls_rule * > > classifier_lookup(const struct classifier *cls, const struct flow *flow, > > struct flow_wildcards *wc) > > { > > + const struct cls_partition *partition; > > struct cls_table *table; > > struct cls_rule *best; > > + tag_type tags; > > + > > + /* Determine 'tags' such that, if 'table->tag' doesn't intersect them, > > then > > + * 'flow' cannot possibly match in 'table': > > + * > > + * - If flow->metadata maps to a given 'partition', then we can use > > + * 'tags' for 'partition->tags'. > > + * > > + * - If flow->metadata has no partition, then no rule in 'cls' has > > an > > + * exact-match for flow->metadata. That means that we don't > > need to > > + * search any table that includes flow->metadata in its mask. > > + * > > + * In either case, we always need to search any cls_tables that do not > > + * include flow->metadata in its mask. One way to do that would be to > > + * check the "cls_table"s explicitly for that, but that would require > > an > > + * extra branch per table. Instead, we mark such a cls_table's 'tags' > > as > > + * TAG_ALL and make sure that 'tags' is never empty. This means that > > + * 'tags' always intersects such a cls_table's 'tags', so we don't > > need a > > + * special case. > > + */ > > + partition = (hmap_is_empty(&cls->partitions) > > + ? NULL > > + : find_partition(cls, flow->metadata, > > + hash_metadata(flow->metadata))); > > + tags = partition ? partition->tags : TAG_ARBITRARY; > > > > best = NULL; > > LIST_FOR_EACH (table, list_node, &cls->tables_priority) { > > - struct cls_rule *rule = find_match(table, flow); > > + struct cls_rule *rule = find_match(table, flow, tags); > > > > if (wc) { > > flow_wildcards_fold_minimask(wc, &table->mask); > > @@ -280,7 +371,7 @@ classifier_lookup(const struct classifier *cls, const > > struct flow *flow, > > * can not find anything better. */ > > return best; > > } > > - rule = find_match(table, flow); > > + rule = find_match(table, flow, tags); > > if (wc) { > > flow_wildcards_fold_minimask(wc, &table->mask); > > } > > @@ -536,6 +627,7 @@ find_table(const struct classifier *cls, const struct > > minimask *mask) > > static struct cls_table * > > insert_table(struct classifier *cls, const struct minimask *mask) > > { > > + uint32_t hash = minimask_hash(mask, 0); > > struct cls_table *table; > > > > table = xzalloc(sizeof *table); > > @@ -543,6 +635,9 @@ insert_table(struct classifier *cls, const struct > > minimask *mask) > > minimask_clone(&table->mask, mask); > > hmap_insert(&cls->tables, &table->hmap_node, minimask_hash(mask, 0)); > > list_push_back(&cls->tables_priority, &table->list_node); > > + table->tag = (minimask_get_metadata_mask(mask) == OVS_BE64_MAX > > + ? tag_create_deterministic(hash) > > + : TAG_ALL); > > > > return table; > > } > > @@ -648,14 +743,17 @@ update_tables_after_removal(struct classifier *cls, > > struct cls_table *table, > > } > > > > static struct cls_rule * > > -find_match(const struct cls_table *table, const struct flow *flow) > > +find_match(const struct cls_table *table, const struct flow *flow, > > + tag_type tags) > > { > > - uint32_t hash = flow_hash_in_minimask(flow, &table->mask, 0); > > - struct cls_rule *rule; > > + if (tag_intersects(tags, table->tag)) { > > + uint32_t hash = flow_hash_in_minimask(flow, &table->mask, 0); > > + struct cls_rule *rule; > > > > - HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &table->rules) { > > - if (minimatch_matches_flow(&rule->match, flow)) { > > - return rule; > > + HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &table->rules) { > > + if (minimatch_matches_flow(&rule->match, flow)) { > > + return rule; > > + } > > } > > } > > > > diff --git a/lib/classifier.h b/lib/classifier.h > > index fdc3af7..07a2929 100644 > > --- a/lib/classifier.h > > +++ b/lib/classifier.h > > @@ -1,5 +1,5 @@ > > /* > > - * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. > > + * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. > > * > > * Licensed under the Apache License, Version 2.0 (the "License"); > > * you may not use this file except in compliance with the License. > > @@ -19,17 +19,66 @@ > > > > /* Flow classifier. > > * > > - * A classifier is a "struct classifier", > > - * a hash map from a set of wildcards to a "struct cls_table", > > - * a hash map from fixed field values to "struct cls_rule", > > - * which can contain a list of otherwise identical > > rules > > - * with lower priorities. > > + * Basic Design > > + * ============ > > + * > > + * Suppose that all the rules in a classifier had the same form. For > > example, > > + * suppose that they all matched on the source and destination Ethernet > > address > > + * and wildcarded all the other fields. Then the obvious way to implement > > a > > + * classifier would be a hash table on the source and destination Ethernet > > + * addresses. If new classification rules came along with a different > > form, > > + * you could add a second hash table that hashed on the fields matched in > > those > > + * rules. With two hash tables, you look up a given flow in each hash > > table. > > + * If there are no matches, the classifier didn't contain a match; if you > > find > > + * a match in one of them, that's the result; if you find a match in both > > of > > + * them, then the result is the rule with the higher priority. > > + * > > + * This is how the classifier works. In a "struct classifier", each form > > of > > + * "struct cls_rule" present (based on its ->match.mask) goes into a > > separate > > + * "struct cls_table". A lookup does a hash lookup in every "struct > > cls_table" > > + * in the classifier and returns the highest-priority match that it finds. > > + * > > + * One detail: a classifier can contain multiple rules that are identical > > other > > + * than their priority. When this happens, only the highest priority rule > > out > > + * of a group of otherwise identical rules is stored directly in the > > "struct > > + * cls_table", with the other almost-identical rules chained off a linked > > list > > + * inside that highest-priority rule. > > + * > > + * > > + * Partitioning > > + * ============ > > + * > > + * Suppose that a given classifier is being used to handle multiple stages > > in a > > + * pipeline using "resubmit", with metadata (that is, the OpenFlow 1.1+ > > field > > + * named "metadata") distinguishing between the different stages. For > > example, > > + * metadata value 1 might identify ingress rules, metadata value 2 might > > + * identify ACLs, and metadata value 3 might identify egress rules. Such a > > + * classifier is essentially partitioned into multiple sub-classifiers on > > the > > + * basis of the metadata value. > > + * > > + * The classifier has a special optimization to speed up matching in this > > + * scenario: > > + * > > + * - Each cls_table that matches on metadata gets a random tag (see > > tag.h). > > + * > > + * - For each metadata value matched by any cls_rule, the classifier > > + * constructs a "struct cls_partition" indexed by the metadata value. > > + * The cls_partition has a 'tags' member whose value is the > > bitwise-OR of > > + * the tags of each cls_table that contains any rule that matches on > > the > > + * cls_partition's metadata value. > > + * > > + * Thus, a flow lookup can start by looking up the partition associated > > with > > + * the flow's metadata, and then skip over any cls_table whose 'tag' does > > not > > + * intersect the partition's 'tags'. (The flow must also be looked up in > > any > > + * cls_table that doesn't match on metadata. We handle that by giving any > > such > > + * cls_table TAG_ALL as its 'tags' so that it matches any tag.) > > */ > > > > #include "flow.h" > > #include "hmap.h" > > #include "list.h" > > #include "match.h" > > +#include "tag.h" > > #include "openflow/nicira-ext.h" > > #include "openflow/openflow.h" > > > > @@ -42,6 +91,7 @@ struct classifier { > > int n_rules; /* Total number of rules. */ > > struct hmap tables; /* Contains "struct cls_table"s. */ > > struct list tables_priority; /* Tables in descending priority order */ > > + struct hmap partitions; /* Contains "struct cls_partition"s. */ > > }; > > > > /* A set of rules that all have the same fields wildcarded. */ > > @@ -53,6 +103,7 @@ struct cls_table { > > int n_table_rules; /* Number of rules, including duplicates. */ > > unsigned int max_priority; /* Max priority of any rule in the table. */ > > unsigned int max_count; /* Count of max_priority rules. */ > > + tag_type tag; /* Tag generated from mask for > > partitioning. */ > > }; > > > > /* Returns true if 'table' is a "catch-all" table that will match every > > @@ -69,6 +120,17 @@ struct cls_rule { > > struct list list; /* List of identical, lower-priority rules. > > */ > > struct minimatch match; /* Matching rule. */ > > unsigned int priority; /* Larger numbers are higher priorities. */ > > + struct cls_partition *partition; > > +}; > > + > > +/* Associates a metadata value (that is, a value of the OpenFlow 1.1+ > > metadata > > + * field) with tags for the "cls_table"s that contain rules that match that > > + * metadata value. */ > > +struct cls_partition { > > + struct hmap_node hmap_node; /* In struct classifier's 'partitions' > > hmap. */ > > + ovs_be64 metadata; /* metadata value for this partition. */ > > + unsigned int n_refs; /* # of flows that refer to this > > partition. */ > > + tag_type tags; /* OR of each included flow's cls_table > > tag. */ > > }; > > > > void cls_rule_init(struct cls_rule *, const struct match *, > > diff --git a/lib/flow.h b/lib/flow.h > > index 2c027ed..89c52b1 100644 > > --- a/lib/flow.h > > +++ b/lib/flow.h > > @@ -21,6 +21,7 @@ > > #include <stdbool.h> > > #include <stdint.h> > > #include <string.h> > > +#include "byte-order.h" > > #include "openflow/nicira-ext.h" > > #include "openflow/openflow.h" > > #include "hash.h" > > @@ -327,6 +328,7 @@ void miniflow_expand(const struct miniflow *, struct > > flow *); > > > > uint32_t miniflow_get(const struct miniflow *, unsigned int u32_ofs); > > uint16_t miniflow_get_vid(const struct miniflow *); > > +static inline ovs_be64 miniflow_get_metadata(const struct miniflow *); > > > > bool miniflow_equal(const struct miniflow *a, const struct miniflow *b); > > bool miniflow_equal_in_minimask(const struct miniflow *a, > > @@ -359,11 +361,36 @@ void minimask_expand(const struct minimask *, struct > > flow_wildcards *); > > > > uint32_t minimask_get(const struct minimask *, unsigned int u32_ofs); > > uint16_t minimask_get_vid_mask(const struct minimask *); > > +static inline ovs_be64 minimask_get_metadata_mask(const struct minimask *); > > > > bool minimask_equal(const struct minimask *a, const struct minimask *b); > > uint32_t minimask_hash(const struct minimask *, uint32_t basis); > > > > bool minimask_has_extra(const struct minimask *, const struct minimask *); > > bool minimask_is_catchall(const struct minimask *); > > + > > +/* Returns the value of the OpenFlow 1.1+ "metadata" field in 'flow'. */ > > +static inline ovs_be64 > > +miniflow_get_metadata(const struct miniflow *flow) > > +{ > > + enum { MD_OFS = offsetof(struct flow, metadata) }; > > + BUILD_ASSERT_DECL(MD_OFS % sizeof(uint32_t) == 0); > > + ovs_be32 hi = (OVS_FORCE ovs_be32) miniflow_get(flow, MD_OFS / 4); > > + ovs_be32 lo = (OVS_FORCE ovs_be32) miniflow_get(flow, MD_OFS / 4 + 1); > > + > > + return htonll(((uint64_t) ntohl(hi) << 32) | ntohl(lo)); > > +} > > + > > +/* Returns the mask for the OpenFlow 1.1+ "metadata" field in 'mask'. > > + * > > + * The return value is all-1-bits if 'mask' matches on the whole value of > > the > > + * metadata field, all-0-bits if 'mask' entirely wildcards the metadata > > field, > > + * or some other value if the metadata field is partially matched, > > partially > > + * wildcarded. */ > > +static inline ovs_be64 > > +minimask_get_metadata_mask(const struct minimask *mask) > > +{ > > + return miniflow_get_metadata(&mask->masks); > > +} > > > > #endif /* flow.h */ > > diff --git a/lib/tag.h b/lib/tag.h > > index 9d6b4aa..6fec24b 100644 > > --- a/lib/tag.h > > +++ b/lib/tag.h > > @@ -1,5 +1,5 @@ > > /* > > - * Copyright (c) 2008, 2011, 2012 Nicira, Inc. > > + * Copyright (c) 2008, 2011, 2012, 2013 Nicira, Inc. > > * > > * Licensed under the Apache License, Version 2.0 (the "License"); > > * you may not use this file except in compliance with the License. > > @@ -68,6 +68,12 @@ > > /* Represents a tag, or the combination of 0 or more tags. */ > > typedef uint32_t tag_type; > > > > +/* A 'tag_type' value that intersects every tag. */ > > +#define TAG_ALL UINT32_MAX > > + > > +/* An arbitrary tag. */ > > +#define TAG_ARBITRARY UINT32_C(3) > > + > > tag_type tag_create_random(void); > > tag_type tag_create_deterministic(uint32_t seed); > > static inline bool tag_intersects(tag_type, tag_type); > > -- > > 1.7.2.5 > > > > _______________________________________________ > > dev mailing list > > dev@openvswitch.org > > http://openvswitch.org/mailman/listinfo/dev > _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev