Thanks for the reminder.  They all look like good comments to me, but
I'm reposting the series with the goal of getting some performance
measurements.  (The 19% in the commit log is obsolete.)  If the
performance measurements are good, I'll revise it.

I'm going to repost the series again in a moment.  I'll mention this
in the commit log this time.

On Fri, Jun 28, 2013 at 07:49:49AM +0000, Rajahalme, Jarno (NSN - FI/Espoo) 
wrote:
> I sent some comments (mostly about comments it seems) earlier you may want to 
> revisit (or not):
> 
> http://openvswitch.org/pipermail/dev/2013-May/027297.html
> 
>  Jarno
> 
> On Jun 28, 2013, at 1:30 , ext Ben Pfaff wrote:
> 
> > We have a controller that puts many rules with different metadata values
> > into the flow table, where metadata is used (by "resubmit"s) to distinguish
> > stages in a pipeline.  Thus, any given flow only needs to be hashed into
> > classifier "cls_table"s that contain a match for the flow's metadata value.
> > This commit optimizes the classifier lookup by (probabilistically) skipping
> > the "cls_table"s that can't possibly match.
> > 
> > (The "metadata" referred to here is the OpenFlow 1.1+ "metadata" field,
> > which is a 64-bit field similar in purpose to the "registers" defined by
> > Open vSwitch.)
> > 
> > This speeds up flow setup performance with the controller in question by
> > about 19%.
> > 
> > Bug #14282.
> > Signed-off-by: Ben Pfaff <b...@nicira.com>
> > ---
> > lib/classifier.c |  116 
> > +++++++++++++++++++++++++++++++++++++++++++++++++----
> > lib/classifier.h |   74 +++++++++++++++++++++++++++++++---
> > lib/flow.h       |   27 ++++++++++++
> > lib/tag.h        |    8 +++-
> > 4 files changed, 209 insertions(+), 16 deletions(-)
> > 
> > diff --git a/lib/classifier.c b/lib/classifier.c
> > index bd202f6..0af6e17 100644
> > --- a/lib/classifier.c
> > +++ b/lib/classifier.c
> > @@ -41,7 +41,7 @@ static void update_tables_after_removal(struct classifier 
> > *,
> >                                        unsigned int del_priority);
> > 
> > static struct cls_rule *find_match(const struct cls_table *,
> > -                                   const struct flow *);
> > +                                   const struct flow *, tag_type tags);
> > static struct cls_rule *find_equal(struct cls_table *,
> >                                   const struct miniflow *, uint32_t hash);
> > static struct cls_rule *insert_rule(struct classifier *,
> > @@ -143,6 +143,7 @@ classifier_init(struct classifier *cls)
> >    cls->n_rules = 0;
> >    hmap_init(&cls->tables);
> >    list_init(&cls->tables_priority);
> > +    hmap_init(&cls->partitions);
> > }
> > 
> > /* Destroys 'cls'.  Rules within 'cls', if any, are not freed; this is the
> > @@ -151,12 +152,20 @@ void
> > classifier_destroy(struct classifier *cls)
> > {
> >    if (cls) {
> > +        struct cls_table *partition, *next_partition;
> >        struct cls_table *table, *next_table;
> > 
> >        HMAP_FOR_EACH_SAFE (table, next_table, hmap_node, &cls->tables) {
> >            destroy_table(cls, table);
> >        }
> >        hmap_destroy(&cls->tables);
> > +
> > +        HMAP_FOR_EACH_SAFE (partition, next_partition, hmap_node,
> > +                            &cls->partitions) {
> > +            hmap_remove(&cls->partitions, &partition->hmap_node);
> > +            free(partition);
> > +        }
> > +        hmap_destroy(&cls->partitions);
> >    }
> > }
> > 
> > @@ -174,6 +183,46 @@ classifier_count(const struct classifier *cls)
> >    return cls->n_rules;
> > }
> > 
> > +static uint32_t
> > +hash_metadata(ovs_be64 metadata_)
> > +{
> > +    uint64_t metadata = (OVS_FORCE uint64_t) metadata_;
> > +    return hash_2words(metadata, metadata >> 32);
> > +}
> > +
> > +static struct cls_partition *
> > +find_partition(const struct classifier *cls, ovs_be64 metadata, uint32_t 
> > hash)
> > +{
> > +    struct cls_partition *partition;
> > +
> > +    HMAP_FOR_EACH_IN_BUCKET (partition, hmap_node, hash, &cls->partitions) 
> > {
> > +        if (partition->metadata == metadata) {
> > +            return partition;
> > +        }
> > +    }
> > +
> > +    return NULL;
> > +}
> > +
> > +static struct cls_partition *
> > +create_partition(struct classifier *cls, struct cls_table *table,
> > +                 ovs_be64 metadata)
> > +{
> > +    uint32_t hash = hash_metadata(metadata);
> > +    struct cls_partition *partition = find_partition(cls, metadata, hash);
> > +    if (partition) {
> > +        partition->n_refs++;
> > +        partition->tags |= table->tag;
> > +    } else {
> > +        partition = xmalloc(sizeof *partition);
> > +        partition->n_refs = 1;
> > +        partition->tags = table->tag;
> > +        partition->metadata = metadata;
> > +        hmap_insert(&cls->partitions, &partition->hmap_node, hash);
> > +    }
> > +    return partition;
> > +}
> > +
> > /* Inserts 'rule' into 'cls'.  Until 'rule' is removed from 'cls', the 
> > caller
> > * must not modify or free it.
> > *
> > @@ -200,8 +249,17 @@ classifier_replace(struct classifier *cls, struct 
> > cls_rule *rule)
> > 
> >    old_rule = insert_rule(cls, table, rule);
> >    if (!old_rule) {
> > +        if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) 
> > {
> > +            ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow);
> > +            rule->partition = create_partition(cls, table, metadata);
> > +        } else {
> > +            rule->partition = NULL;
> > +        }
> > +
> >        table->n_table_rules++;
> >        cls->n_rules++;
> > +    } else {
> > +        rule->partition = old_rule->partition;
> >    }
> >    return old_rule;
> > }
> > @@ -225,6 +283,7 @@ classifier_insert(struct classifier *cls, struct 
> > cls_rule *rule)
> > void
> > classifier_remove(struct classifier *cls, struct cls_rule *rule)
> > {
> > +    struct cls_partition *partition;
> >    struct cls_rule *head;
> >    struct cls_table *table;
> > 
> > @@ -242,6 +301,12 @@ classifier_remove(struct classifier *cls, struct 
> > cls_rule *rule)
> >        hmap_replace(&table->rules, &rule->hmap_node, &next->hmap_node);
> >    }
> > 
> > +    partition = rule->partition;
> > +    if (partition && --partition->n_refs == 0) {
> > +        hmap_remove(&cls->partitions, &partition->hmap_node);
> > +        free(partition);
> > +    }
> > +
> >    if (--table->n_table_rules == 0) {
> >        destroy_table(cls, table);
> >    } else {
> > @@ -262,12 +327,38 @@ struct cls_rule *
> > classifier_lookup(const struct classifier *cls, const struct flow *flow,
> >                  struct flow_wildcards *wc)
> > {
> > +    const struct cls_partition *partition;
> >    struct cls_table *table;
> >    struct cls_rule *best;
> > +    tag_type tags;
> > +
> > +    /* Determine 'tags' such that, if 'table->tag' doesn't intersect them, 
> > then
> > +     * 'flow' cannot possibly match in 'table':
> > +     *
> > +     *     - If flow->metadata maps to a given 'partition', then we can use
> > +     *       'tags' for 'partition->tags'.
> > +     *
> > +     *     - If flow->metadata has no partition, then no rule in 'cls' has 
> > an
> > +     *       exact-match for flow->metadata.  That means that we don't 
> > need to
> > +     *       search any table that includes flow->metadata in its mask.
> > +     *
> > +     * In either case, we always need to search any cls_tables that do not
> > +     * include flow->metadata in its mask.  One way to do that would be to
> > +     * check the "cls_table"s explicitly for that, but that would require 
> > an
> > +     * extra branch per table.  Instead, we mark such a cls_table's 'tags' 
> > as
> > +     * TAG_ALL and make sure that 'tags' is never empty.  This means that
> > +     * 'tags' always intersects such a cls_table's 'tags', so we don't 
> > need a
> > +     * special case.
> > +     */
> > +    partition = (hmap_is_empty(&cls->partitions)
> > +                 ? NULL
> > +                 : find_partition(cls, flow->metadata,
> > +                                  hash_metadata(flow->metadata)));
> > +    tags = partition ? partition->tags : TAG_ARBITRARY;
> > 
> >    best = NULL;
> >    LIST_FOR_EACH (table, list_node, &cls->tables_priority) {
> > -        struct cls_rule *rule = find_match(table, flow);
> > +        struct cls_rule *rule = find_match(table, flow, tags);
> > 
> >        if (wc) {
> >            flow_wildcards_fold_minimask(wc, &table->mask);
> > @@ -280,7 +371,7 @@ classifier_lookup(const struct classifier *cls, const 
> > struct flow *flow,
> >                     * can not find anything better. */
> >                    return best;
> >                }
> > -                rule = find_match(table, flow);
> > +                rule = find_match(table, flow, tags);
> >                if (wc) {
> >                    flow_wildcards_fold_minimask(wc, &table->mask);
> >                }
> > @@ -536,6 +627,7 @@ find_table(const struct classifier *cls, const struct 
> > minimask *mask)
> > static struct cls_table *
> > insert_table(struct classifier *cls, const struct minimask *mask)
> > {
> > +    uint32_t hash = minimask_hash(mask, 0);
> >    struct cls_table *table;
> > 
> >    table = xzalloc(sizeof *table);
> > @@ -543,6 +635,9 @@ insert_table(struct classifier *cls, const struct 
> > minimask *mask)
> >    minimask_clone(&table->mask, mask);
> >    hmap_insert(&cls->tables, &table->hmap_node, minimask_hash(mask, 0));
> >    list_push_back(&cls->tables_priority, &table->list_node);
> > +    table->tag = (minimask_get_metadata_mask(mask) == OVS_BE64_MAX
> > +                  ? tag_create_deterministic(hash)
> > +                  : TAG_ALL);
> > 
> >    return table;
> > }
> > @@ -648,14 +743,17 @@ update_tables_after_removal(struct classifier *cls, 
> > struct cls_table *table,
> > }
> > 
> > static struct cls_rule *
> > -find_match(const struct cls_table *table, const struct flow *flow)
> > +find_match(const struct cls_table *table, const struct flow *flow,
> > +           tag_type tags)
> > {
> > -    uint32_t hash = flow_hash_in_minimask(flow, &table->mask, 0);
> > -    struct cls_rule *rule;
> > +    if (tag_intersects(tags, table->tag)) {
> > +        uint32_t hash = flow_hash_in_minimask(flow, &table->mask, 0);
> > +        struct cls_rule *rule;
> > 
> > -    HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &table->rules) {
> > -        if (minimatch_matches_flow(&rule->match, flow)) {
> > -            return rule;
> > +        HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &table->rules) {
> > +            if (minimatch_matches_flow(&rule->match, flow)) {
> > +                return rule;
> > +            }
> >        }
> >    }
> > 
> > diff --git a/lib/classifier.h b/lib/classifier.h
> > index fdc3af7..07a2929 100644
> > --- a/lib/classifier.h
> > +++ b/lib/classifier.h
> > @@ -1,5 +1,5 @@
> > /*
> > - * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc.
> > + * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
> > *
> > * Licensed under the Apache License, Version 2.0 (the "License");
> > * you may not use this file except in compliance with the License.
> > @@ -19,17 +19,66 @@
> > 
> > /* Flow classifier.
> > *
> > - * A classifier is a "struct classifier",
> > - *      a hash map from a set of wildcards to a "struct cls_table",
> > - *              a hash map from fixed field values to "struct cls_rule",
> > - *                      which can contain a list of otherwise identical 
> > rules
> > - *                      with lower priorities.
> > + * Basic Design
> > + * ============
> > + *
> > + * Suppose that all the rules in a classifier had the same form.  For 
> > example,
> > + * suppose that they all matched on the source and destination Ethernet 
> > address
> > + * and wildcarded all the other fields.  Then the obvious way to implement 
> > a
> > + * classifier would be a hash table on the source and destination Ethernet
> > + * addresses.  If new classification rules came along with a different 
> > form,
> > + * you could add a second hash table that hashed on the fields matched in 
> > those
> > + * rules.  With two hash tables, you look up a given flow in each hash 
> > table.
> > + * If there are no matches, the classifier didn't contain a match; if you 
> > find
> > + * a match in one of them, that's the result; if you find a match in both 
> > of
> > + * them, then the result is the rule with the higher priority.
> > + *
> > + * This is how the classifier works.  In a "struct classifier", each form 
> > of
> > + * "struct cls_rule" present (based on its ->match.mask) goes into a 
> > separate
> > + * "struct cls_table".  A lookup does a hash lookup in every "struct 
> > cls_table"
> > + * in the classifier and returns the highest-priority match that it finds.
> > + *
> > + * One detail: a classifier can contain multiple rules that are identical 
> > other
> > + * than their priority.  When this happens, only the highest priority rule 
> > out
> > + * of a group of otherwise identical rules is stored directly in the 
> > "struct
> > + * cls_table", with the other almost-identical rules chained off a linked 
> > list
> > + * inside that highest-priority rule.
> > + *
> > + *
> > + * Partitioning
> > + * ============
> > + *
> > + * Suppose that a given classifier is being used to handle multiple stages 
> > in a
> > + * pipeline using "resubmit", with metadata (that is, the OpenFlow 1.1+ 
> > field
> > + * named "metadata") distinguishing between the different stages.  For 
> > example,
> > + * metadata value 1 might identify ingress rules, metadata value 2 might
> > + * identify ACLs, and metadata value 3 might identify egress rules.  Such a
> > + * classifier is essentially partitioned into multiple sub-classifiers on 
> > the
> > + * basis of the metadata value.
> > + *
> > + * The classifier has a special optimization to speed up matching in this
> > + * scenario:
> > + *
> > + *     - Each cls_table that matches on metadata gets a random tag (see 
> > tag.h).
> > + *
> > + *     - For each metadata value matched by any cls_rule, the classifier
> > + *       constructs a "struct cls_partition" indexed by the metadata value.
> > + *       The cls_partition has a 'tags' member whose value is the 
> > bitwise-OR of
> > + *       the tags of each cls_table that contains any rule that matches on 
> > the
> > + *       cls_partition's metadata value.
> > + *
> > + * Thus, a flow lookup can start by looking up the partition associated 
> > with
> > + * the flow's metadata, and then skip over any cls_table whose 'tag' does 
> > not
> > + * intersect the partition's 'tags'.  (The flow must also be looked up in 
> > any
> > + * cls_table that doesn't match on metadata.  We handle that by giving any 
> > such
> > + * cls_table TAG_ALL as its 'tags' so that it matches any tag.)
> > */
> > 
> > #include "flow.h"
> > #include "hmap.h"
> > #include "list.h"
> > #include "match.h"
> > +#include "tag.h"
> > #include "openflow/nicira-ext.h"
> > #include "openflow/openflow.h"
> > 
> > @@ -42,6 +91,7 @@ struct classifier {
> >    int n_rules;                /* Total number of rules. */
> >    struct hmap tables;         /* Contains "struct cls_table"s.  */
> >    struct list tables_priority; /* Tables in descending priority order */
> > +    struct hmap partitions;     /* Contains "struct cls_partition"s. */
> > };
> > 
> > /* A set of rules that all have the same fields wildcarded. */
> > @@ -53,6 +103,7 @@ struct cls_table {
> >    int n_table_rules;          /* Number of rules, including duplicates. */
> >    unsigned int max_priority;  /* Max priority of any rule in the table. */
> >    unsigned int max_count;     /* Count of max_priority rules. */
> > +    tag_type tag;               /* Tag generated from mask for 
> > partitioning. */
> > };
> > 
> > /* Returns true if 'table' is a "catch-all" table that will match every
> > @@ -69,6 +120,17 @@ struct cls_rule {
> >    struct list list;           /* List of identical, lower-priority rules. 
> > */
> >    struct minimatch match;     /* Matching rule. */
> >    unsigned int priority;      /* Larger numbers are higher priorities. */
> > +    struct cls_partition *partition;
> > +};
> > +
> > +/* Associates a metadata value (that is, a value of the OpenFlow 1.1+ 
> > metadata
> > + * field) with tags for the "cls_table"s that contain rules that match that
> > + * metadata value.  */
> > +struct cls_partition {
> > +    struct hmap_node hmap_node; /* In struct classifier's 'partitions' 
> > hmap. */
> > +    ovs_be64 metadata;          /* metadata value for this partition. */
> > +    unsigned int n_refs;        /* # of flows that refer to this 
> > partition. */
> > +    tag_type tags;              /* OR of each included flow's cls_table 
> > tag. */
> > };
> > 
> > void cls_rule_init(struct cls_rule *, const struct match *,
> > diff --git a/lib/flow.h b/lib/flow.h
> > index 2c027ed..89c52b1 100644
> > --- a/lib/flow.h
> > +++ b/lib/flow.h
> > @@ -21,6 +21,7 @@
> > #include <stdbool.h>
> > #include <stdint.h>
> > #include <string.h>
> > +#include "byte-order.h"
> > #include "openflow/nicira-ext.h"
> > #include "openflow/openflow.h"
> > #include "hash.h"
> > @@ -327,6 +328,7 @@ void miniflow_expand(const struct miniflow *, struct 
> > flow *);
> > 
> > uint32_t miniflow_get(const struct miniflow *, unsigned int u32_ofs);
> > uint16_t miniflow_get_vid(const struct miniflow *);
> > +static inline ovs_be64 miniflow_get_metadata(const struct miniflow *);
> > 
> > bool miniflow_equal(const struct miniflow *a, const struct miniflow *b);
> > bool miniflow_equal_in_minimask(const struct miniflow *a,
> > @@ -359,11 +361,36 @@ void minimask_expand(const struct minimask *, struct 
> > flow_wildcards *);
> > 
> > uint32_t minimask_get(const struct minimask *, unsigned int u32_ofs);
> > uint16_t minimask_get_vid_mask(const struct minimask *);
> > +static inline ovs_be64 minimask_get_metadata_mask(const struct minimask *);
> > 
> > bool minimask_equal(const struct minimask *a, const struct minimask *b);
> > uint32_t minimask_hash(const struct minimask *, uint32_t basis);
> > 
> > bool minimask_has_extra(const struct minimask *, const struct minimask *);
> > bool minimask_is_catchall(const struct minimask *);
> > +
> > +/* Returns the value of the OpenFlow 1.1+ "metadata" field in 'flow'. */
> > +static inline ovs_be64
> > +miniflow_get_metadata(const struct miniflow *flow)
> > +{
> > +    enum { MD_OFS = offsetof(struct flow, metadata) };
> > +    BUILD_ASSERT_DECL(MD_OFS % sizeof(uint32_t) == 0);
> > +    ovs_be32 hi = (OVS_FORCE ovs_be32) miniflow_get(flow, MD_OFS / 4);
> > +    ovs_be32 lo = (OVS_FORCE ovs_be32) miniflow_get(flow, MD_OFS / 4 + 1);
> > +
> > +    return htonll(((uint64_t) ntohl(hi) << 32) | ntohl(lo));
> > +}
> > +
> > +/* Returns the mask for the OpenFlow 1.1+ "metadata" field in 'mask'.
> > + *
> > + * The return value is all-1-bits if 'mask' matches on the whole value of 
> > the
> > + * metadata field, all-0-bits if 'mask' entirely wildcards the metadata 
> > field,
> > + * or some other value if the metadata field is partially matched, 
> > partially
> > + * wildcarded. */
> > +static inline ovs_be64
> > +minimask_get_metadata_mask(const struct minimask *mask)
> > +{
> > +    return miniflow_get_metadata(&mask->masks);
> > +}
> > 
> > #endif /* flow.h */
> > diff --git a/lib/tag.h b/lib/tag.h
> > index 9d6b4aa..6fec24b 100644
> > --- a/lib/tag.h
> > +++ b/lib/tag.h
> > @@ -1,5 +1,5 @@
> > /*
> > - * Copyright (c) 2008, 2011, 2012 Nicira, Inc.
> > + * Copyright (c) 2008, 2011, 2012, 2013 Nicira, Inc.
> > *
> > * Licensed under the Apache License, Version 2.0 (the "License");
> > * you may not use this file except in compliance with the License.
> > @@ -68,6 +68,12 @@
> > /* Represents a tag, or the combination of 0 or more tags. */
> > typedef uint32_t tag_type;
> > 
> > +/* A 'tag_type' value that intersects every tag. */
> > +#define TAG_ALL UINT32_MAX
> > +
> > +/* An arbitrary tag. */
> > +#define TAG_ARBITRARY UINT32_C(3)
> > +
> > tag_type tag_create_random(void);
> > tag_type tag_create_deterministic(uint32_t seed);
> > static inline bool tag_intersects(tag_type, tag_type);
> > -- 
> > 1.7.2.5
> > 
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > http://openvswitch.org/mailman/listinfo/dev
> 
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to