We have a controller that puts many rules with different reg0 values into the flow table, where reg0 is used (by "resubmit"s) to distinguish stages in a pipeline. Thus, any given flow only needs to be hashed into classifier "cls_table"s that contain a match for the flow's reg0 value. This commit optimizes the classifier lookup by (probabilistically) skipping the "cls_table"s that can't possibly match.
This speeds up flow setup performance with the controller in question by about 19%. Bug #14282. Signed-off-by: Ben Pfaff <b...@nicira.com> --- lib/classifier.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++--- lib/classifier.h | 72 +++++++++++++++++++++++++++++++++++--- lib/flow.h | 27 ++++++++++++++- lib/tag.h | 8 ++++- 4 files changed, 194 insertions(+), 13 deletions(-) diff --git a/lib/classifier.c b/lib/classifier.c index d1fe524..8dc0fdc 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. + * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -135,6 +135,7 @@ classifier_init(struct classifier *cls) { cls->n_rules = 0; hmap_init(&cls->tables); + hmap_init(&cls->partitions); } /* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the @@ -143,12 +144,20 @@ void classifier_destroy(struct classifier *cls) { if (cls) { + struct cls_table *partition, *next_partition; struct cls_table *table, *next_table; HMAP_FOR_EACH_SAFE (table, next_table, hmap_node, &cls->tables) { destroy_table(cls, table); } hmap_destroy(&cls->tables); + + HMAP_FOR_EACH_SAFE (partition, next_partition, hmap_node, + &cls->partitions) { + hmap_remove(&cls->partitions, &partition->hmap_node); + free(partition); + } + hmap_destroy(&cls->partitions); } } @@ -166,6 +175,39 @@ classifier_count(const struct classifier *cls) return cls->n_rules; } +static struct cls_partition * +find_partition(const struct classifier *cls, uint32_t reg0, uint32_t hash) +{ + struct cls_partition *partition; + + HMAP_FOR_EACH_IN_BUCKET (partition, hmap_node, hash, &cls->partitions) { + if (partition->reg0 == reg0) { + return partition; + } + } + + return NULL; +} + +static struct cls_partition * +create_partition(struct classifier *cls, struct cls_table *table, + uint32_t reg0) +{ + uint32_t hash = hash_int(reg0, 0); + struct cls_partition *partition = find_partition(cls, reg0, hash); + if (partition) { + partition->n_refs++; + partition->tags |= table->tag; + } else { + partition = xmalloc(sizeof *partition); + partition->n_refs = 1; + partition->tags = table->tag; + partition->reg0 = reg0; + hmap_insert(&cls->partitions, &partition->hmap_node, hash); + } + return partition; +} + /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller * must not modify or free it. * @@ -192,8 +234,17 @@ classifier_replace(struct classifier *cls, struct cls_rule *rule) old_rule = insert_rule(table, rule); if (!old_rule) { + if (minimask_get_reg_mask(&rule->match.mask, 0) == UINT32_MAX) { + uint32_t reg0 = miniflow_get_reg(&rule->match.flow, 0); + rule->partition = create_partition(cls, table, reg0); + } else { + rule->partition = NULL; + } + table->n_table_rules++; cls->n_rules++; + } else { + rule->partition = old_rule->partition; } return old_rule; } @@ -217,6 +268,7 @@ classifier_insert(struct classifier *cls, struct cls_rule *rule) void classifier_remove(struct classifier *cls, struct cls_rule *rule) { + struct cls_partition *partition; struct cls_rule *head; struct cls_table *table; @@ -234,6 +286,12 @@ classifier_remove(struct classifier *cls, struct cls_rule *rule) hmap_replace(&table->rules, &rule->hmap_node, &next->hmap_node); } + partition = rule->partition; + if (partition && --partition->n_refs == 0) { + hmap_remove(&cls->partitions, &partition->hmap_node); + free(partition); + } + if (--table->n_table_rules == 0) { destroy_table(cls, table); } @@ -247,14 +305,41 @@ classifier_remove(struct classifier *cls, struct cls_rule *rule) struct cls_rule * classifier_lookup(const struct classifier *cls, const struct flow *flow) { + const struct cls_partition *partition; struct cls_table *table; struct cls_rule *best; + tag_type tags; + + /* Determine 'tags' such that, if 'table->tag' doesn't intersect them, then + * 'flow' cannot possibly match in 'table': + * + * - If flow->regs[0] maps to a given 'partition', then we can use + * 'tags' for 'partition->tags'. + * + * - If flow->regs[0] has no partition, then no rule in 'cls' has an + * exact-match for flow->regs[0]. That means that we don't need to + * search any table that includes flow->regs[0] in its mask. + * + * In either case, we always need to search any cls_tables that do not + * include flow->regs[0] in its mask. One way to do that would be to check + * the "cls_table"s explicitly for that, but that would require an extra + * branch per table. Instead, we mark such a cls_table's 'tags' as TAG_ALL + * and make sure that 'tags' is never empty. This means that 'tags' always + * intersects such a cls_table's 'tags', so we don't need a special case. + */ + partition = (hmap_is_empty(&cls->partitions) + ? NULL + : find_partition(cls, flow->regs[0], + hash_int(flow->regs[0], 0))); + tags = partition ? partition->tags : TAG_ARBITRARY; best = NULL; HMAP_FOR_EACH (table, hmap_node, &cls->tables) { - struct cls_rule *rule = find_match(table, flow); - if (rule && (!best || rule->priority > best->priority)) { - best = rule; + if (tag_intersects(tags, table->tag)) { + struct cls_rule *rule = find_match(table, flow); + if (rule && (!best || rule->priority > best->priority)) { + best = rule; + } } } return best; @@ -489,12 +574,17 @@ find_table(const struct classifier *cls, const struct minimask *mask) static struct cls_table * insert_table(struct classifier *cls, const struct minimask *mask) { + uint32_t hash = minimask_hash(mask, 0); struct cls_table *table; table = xzalloc(sizeof *table); hmap_init(&table->rules); minimask_clone(&table->mask, mask); - hmap_insert(&cls->tables, &table->hmap_node, minimask_hash(mask, 0)); + hmap_insert(&cls->tables, &table->hmap_node, hash); + + table->tag = (minimask_get_reg_mask(mask, 0) == UINT32_MAX + ? tag_create_deterministic(hash) + : TAG_ALL); return table; } diff --git a/lib/classifier.h b/lib/classifier.h index bc9db33..fdc7003 100644 --- a/lib/classifier.h +++ b/lib/classifier.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. + * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,17 +19,65 @@ /* Flow classifier. * - * A classifier is a "struct classifier", - * a hash map from a set of wildcards to a "struct cls_table", - * a hash map from fixed field values to "struct cls_rule", - * which can contain a list of otherwise identical rules - * with lower priorities. + * Basic Design + * ============ + * + * Suppose that all the rules in a classifier had the same form. For example, + * suppose that they all matched on the source and destination Ethernet address + * and wildcarded all the other fields. Then the obvious way to implement a + * classifier would be a hash table on the source and destination Ethernet + * addresses. If new classification rules came along with a different form, + * you could add a second hash table that hashed on the fields matched in those + * rules. With two hash tables, you look up a given flow in each hash table. + * If there are no matches, the classifier didn't contain a match; if you find + * a match in one of them, that's the result; if you find a match in both of + * them, then the result is the rule with the higher priority. + * + * This is how the classifier works. In a "struct classifier", each form of + * "struct cls_rule" present (based on its ->match.mask) goes into a separate + * "struct cls_table". A lookup does a hash lookup in every "struct cls_table" + * in the classifier and returns the highest-priority match that it finds. + * + * One detail: a classifier can contain multiple rules that are identical other + * than their priority. When this happens, only the highest priority rule out + * of a group of otherwise identical rules is stored directly in the "struct + * cls_table", with the other almost-identical rules chained off a linked list + * inside that highest-priority rule. + * + * + * Partitioning + * ============ + * + * Suppose that a given classifier is being used to handle multiple stages in a + * pipeline using "resubmit", with reg0 distinguishing between the different + * stages. For example, reg0 value 1 might identify ingress rules, reg0 value + * 2 might identify ACLs, and reg0 value 3 might identify egress rules. Such a + * classifier is essentially partitioned into multiple sub-classifiers on the + * basis of the reg0 value. + * + * The classifier has a special optimization to speed up matching in this + * scenario: + * + * - Each cls_table that matches on reg0 gets a random tag (see tag.h). + * + * - For each reg0 value matched by any cls_rule, the classifier constructs + * a "struct cls_partition" indexed by the reg0 value. The cls_partition + * has a 'tags' member whose value is the bitwise-OR of the tags of each + * cls_table that contains any rule that matches on the cls_partition's + * reg0 value. + * + * Thus, a flow lookup can start by looking up the partition associated with + * the flow's reg0, and then skip over any cls_table whose 'tag' does not + * intersect the partition's 'tags'. (The flow must also be looked up in any + * cls_table that doesn't match on reg0. We handle that by giving any such + * cls_table TAG_ALL as its 'tags' so that it matches any tag.) */ #include "flow.h" #include "hmap.h" #include "list.h" #include "match.h" +#include "tag.h" #include "openflow/nicira-ext.h" #include "openflow/openflow.h" @@ -41,6 +89,7 @@ extern "C" { struct classifier { int n_rules; /* Total number of rules. */ struct hmap tables; /* Contains "struct cls_table"s. */ + struct hmap partitions; /* Contains "struct cls_partition"s. */ }; /* A set of rules that all have the same fields wildcarded. */ @@ -49,6 +98,7 @@ struct cls_table { struct hmap rules; /* Contains "struct cls_rule"s. */ struct minimask mask; /* Wildcards for fields. */ int n_table_rules; /* Number of rules, including duplicates. */ + tag_type tag; /* Tag generated from mask for partitioning. */ }; /* Returns true if 'table' is a "catch-all" table that will match every @@ -65,6 +115,16 @@ struct cls_rule { struct list list; /* List of identical, lower-priority rules. */ struct minimatch match; /* Matching rule. */ unsigned int priority; /* Larger numbers are higher priorities. */ + struct cls_partition *partition; +}; + +/* Associates a reg0 value with tags for the "cls_table"s that contain rules + * that match that reg0 value. */ +struct cls_partition { + struct hmap_node hmap_node; /* In struct classifier's 'partitions' hmap. */ + uint32_t reg0; /* reg0 value for this partition. */ + unsigned int n_refs; /* # of flows that refer to this partition. */ + tag_type tags; /* OR of each included flow's cls_table tag. */ }; void cls_rule_init(struct cls_rule *, const struct match *, diff --git a/lib/flow.h b/lib/flow.h index 8e79e62..b5c8827 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -251,6 +251,7 @@ void miniflow_expand(const struct miniflow *, struct flow *); uint32_t miniflow_get(const struct miniflow *, unsigned int u32_ofs); uint16_t miniflow_get_vid(const struct miniflow *); +static inline uint32_t miniflow_get_reg(const struct miniflow *, int index); bool miniflow_equal(const struct miniflow *a, const struct miniflow *b); bool miniflow_equal_in_minimask(const struct miniflow *a, @@ -283,11 +284,35 @@ void minimask_expand(const struct minimask *, struct flow_wildcards *); uint32_t minimask_get(const struct minimask *, unsigned int u32_ofs); uint16_t minimask_get_vid_mask(const struct minimask *); +static inline uint32_t minimask_get_reg_mask(const struct minimask *, + int index); bool minimask_equal(const struct minimask *a, const struct minimask *b); uint32_t minimask_hash(const struct minimask *, uint32_t basis); bool minimask_has_extra(const struct minimask *, const struct minimask *); bool minimask_is_catchall(const struct minimask *); + +/* Returns the value of register 'index' in 'flow'. */ +static inline uint32_t +miniflow_get_reg(const struct miniflow *flow, int index) +{ + enum { REG_OFS = offsetof(struct flow, regs) }; + + BUILD_ASSERT_DECL(REG_OFS % sizeof(uint32_t) == 0); + + return miniflow_get(flow, REG_OFS / sizeof(uint32_t) + index); +} + +/* Returns the mask for register 'index' in 'mask'. + * + * The return value is UINT32_MAX if 'mask' matches on the whole value of the + * register, 0 if 'mask' entirely wildcards the register, or some other value + * if the register is partially matched, partially wildcarded. */ +static inline uint32_t +minimask_get_reg_mask(const struct minimask *mask, int index) +{ + return miniflow_get_reg(&mask->masks, index); +} #endif /* flow.h */ diff --git a/lib/tag.h b/lib/tag.h index 2050de0..5d7c65b 100644 --- a/lib/tag.h +++ b/lib/tag.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2011 Nicira, Inc. + * Copyright (c) 2008, 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,6 +69,12 @@ /* Represents a tag, or the combination of 0 or more tags. */ typedef uint32_t tag_type; +/* A 'tag_type' value that intersects every tag. */ +#define TAG_ALL UINT32_MAX + +/* An arbitrary tag. */ +#define TAG_ARBITRARY UINT32_C(3) + tag_type tag_create_random(void); tag_type tag_create_deterministic(uint32_t seed); static inline bool tag_intersects(tag_type, tag_type); -- 1.7.2.5 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev