Yep this is the same thing.  I've successfully shown that this
algorithm is the best for our use case and was intending to implement
it myself.  However, for many rather complicated reasons it became
important to have a prototype implementation quickly, so Jarno
graciously offered to pound out the implementation.

On the issue of the algorithm choice.  We tried a bunch of things, but
when we simulated it against some real firewall rule sets, we found
that the simplest is the best: a decision tree.

Ethan

On Mon, Mar 10, 2014 at 6:41 PM, Ben Pfaff <b...@nicira.com> wrote:
> Is this the same as what you've been working on, Ethan? If not, is it
> complementary, or...?
>
> On Mar 10, 2014 6:39 PM, "Jarno Rajahalme" <jrajaha...@nicira.com> wrote:
>>
>> Intended to send this as an RFC,
>>
>>   Jarno
>>
>> On Mar 10, 2014, at 6:43 PM, Jarno Rajahalme <jrajaha...@nicira.com>
>> wrote:
>>
>> > This should optimize port masks for megaflows for typical port usage
>> > in matches.  Each subtable has it's own trie if the subtable matches
>> > any of the ports bits.  This trie is consulted only after failing
>> > lookup to determine the number of bits that needs to be unwildcarded
>> > to guarantee that any packet that should match on any of the other
>> > rules will NOT match this megaflow.
>> >
>> > This version is not configurable and is always on.
>> >
>> > Signed-off-by: Jarno Rajahalme <jrajaha...@nicira.com>
>> > ---
>> > lib/classifier.c    |   90
>> > +++++++++++++++++++++++++++++++++++++++++++++++----
>> > lib/classifier.h    |    2 ++
>> > tests/classifier.at |    6 ++--
>> > 3 files changed, 89 insertions(+), 9 deletions(-)
>> >
>> > diff --git a/lib/classifier.c b/lib/classifier.c
>> > index 30a91b7..c36ccf6 100644
>> > --- a/lib/classifier.c
>> > +++ b/lib/classifier.c
>> > @@ -30,6 +30,10 @@
>> >
>> > VLOG_DEFINE_THIS_MODULE(classifier);
>> >
>> > +/* Ports trie depends on both ports sharing the same ovs_be32. */
>> > +#define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4)
>> > +BUILD_ASSERT_DECL((offsetof(struct flow, tp_dst) / 4) ==
>> > TP_PORTS_OFS32);
>> > +
>> > struct trie_ctx;
>> > static struct cls_subtable *find_subtable(const struct classifier *,
>> >                                           const struct minimask *);
>> > @@ -71,10 +75,16 @@ static void trie_init(struct classifier *, int
>> > trie_idx,
>> >                       const struct mf_field *);
>> > static unsigned int trie_lookup(const struct cls_trie *, const struct
>> > flow *,
>> >                                 unsigned int *checkbits);
>> > -
>> > +static unsigned int trie_lookup_value(const struct trie_node *,
>> > +                                      const ovs_be32 value[],
>> > +                                      unsigned int *checkbits);
>> > static void trie_destroy(struct trie_node *);
>> > static void trie_insert(struct cls_trie *, const struct cls_rule *, int
>> > mlen);
>> > +static void trie_insert_prefix(struct trie_node **, const ovs_be32
>> > *prefix,
>> > +                               int mlen);
>> > static void trie_remove(struct cls_trie *, const struct cls_rule *, int
>> > mlen);
>> > +static void trie_remove_prefix(struct trie_node **, const ovs_be32
>> > *prefix,
>> > +                               int mlen);
>> > static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t
>> > be32ofs,
>> >                                  unsigned int nbits);
>> > static bool mask_prefix_bits_set(const struct flow_wildcards *,
>> > @@ -389,6 +399,23 @@ classifier_replace(struct classifier *cls, struct
>> > cls_rule *rule)
>> >                 trie_insert(&cls->tries[i], rule,
>> > subtable->trie_plen[i]);
>> >             }
>> >         }
>> > +
>> > +        /* Ports trie. */
>> > +        if (subtable->ports_mask_len) {
>> > +            ovs_be32 ports, mask, masked_ports;
>> > +
>> > +            /* We mask the value to be inserted to always have the
>> > wildcarded
>> > +             * bits in known (zero) state, so we can include them in
>> > comparison
>> > +             * and they will always match (== their original value does
>> > not
>> > +             * matter). */
>> > +            ports = (OVS_FORCE ovs_be32)miniflow_get(&rule->match.flow,
>> > +                                                     TP_PORTS_OFS32);
>> > +            mask = minimask_get(&subtable->mask, TP_PORTS_OFS32);
>> > +            masked_ports = ports & mask;
>> > +
>> > +            trie_insert_prefix(&subtable->ports_trie, &masked_ports,
>> > +                               subtable->ports_mask_len);
>> > +        }
>> >     } else {
>> >         rule->partition = old_rule->partition;
>> >     }
>> > @@ -421,6 +448,17 @@ classifier_remove(struct classifier *cls, struct
>> > cls_rule *rule)
>> >
>> >     subtable = find_subtable(cls, &rule->match.mask);
>> >
>> > +    if (subtable->ports_mask_len) {
>> > +        ovs_be32 ports, mask, masked_ports;
>> > +
>> > +        ports = (OVS_FORCE ovs_be32)miniflow_get(&rule->match.flow,
>> > +                                                 TP_PORTS_OFS32);
>> > +        mask = minimask_get(&subtable->mask, TP_PORTS_OFS32);
>> > +        masked_ports = ports & mask;
>> > +
>> > +        trie_remove_prefix(&subtable->ports_trie,
>> > +                           &masked_ports, subtable->ports_mask_len);
>> > +    }
>> >     for (i = 0; i < cls->n_tries; i++) {
>> >         if (subtable->trie_plen[i]) {
>> >             trie_remove(&cls->tries[i], rule, subtable->trie_plen[i]);
>> > @@ -859,6 +897,11 @@ insert_subtable(struct classifier *cls, const
>> > struct minimask *mask)
>> >
>> > cls->tries[i].field);
>> >     }
>> >
>> > +    /* Ports trie. */
>> > +    subtable->ports_trie = NULL;
>> > +    subtable->ports_mask_len = 32
>> > +        - ctz32(ntohl((OVS_FORCE ovs_be32)minimask_get(mask,
>> > TP_PORTS_OFS32)));
>> > +
>> >     return subtable;
>> > }
>> >
>> > @@ -867,6 +910,9 @@ destroy_subtable(struct classifier *cls, struct
>> > cls_subtable *subtable)
>> > {
>> >     int i;
>> >
>> > +    /* Port tries. */
>> > +    trie_destroy(subtable->ports_trie);
>> > +
>> >     for (i = 0; i < subtable->n_indices; i++) {
>> >         hindex_destroy(&subtable->indices[i]);
>> >     }
>> > @@ -1121,6 +1167,23 @@ find_match_wc(const struct cls_subtable
>> > *subtable, const struct flow *flow,
>> >          * but it didn't match. */
>> >         rule = NULL;
>> >     }
>> > +    if (!rule && subtable->ports_mask_len) {
>> > +        /* Ports are always part of the final range, if any.
>> > +         * No match was found for the ports.  Use the ports trie to
>> > figure out
>> > +         * which ports bits to unwildcard. */
>> > +        unsigned int mbits;
>> > +        ovs_be32 value, mask;
>> > +
>> > +        mask = minimask_get(&subtable->mask, TP_PORTS_OFS32);
>> > +        value = ((ovs_be32 *)flow)[TP_PORTS_OFS32] & mask;
>> > +        trie_lookup_value(subtable->ports_trie, &value, &mbits);
>> > +
>> > +        ((ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |=
>> > +            mask & htonl(~0 << (32 - mbits));
>> > +
>> > +        ofs.start = TP_PORTS_OFS32;
>> > +        goto range_out;
>> > +    }
>> >  out:
>> >     /* Must unwildcard all the fields, as they were looked at. */
>> >     flow_wildcards_fold_minimask(wc, &subtable->mask);
>> > @@ -1513,14 +1576,18 @@ minimatch_get_prefix(const struct minimatch
>> > *match, const struct mf_field *mf)
>> > static void
>> > trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int
>> > mlen)
>> > {
>> > -    const ovs_be32 *prefix = minimatch_get_prefix(&rule->match,
>> > trie->field);
>> > +    trie_insert_prefix(&trie->root,
>> > +                       minimatch_get_prefix(&rule->match, trie->field),
>> > mlen);
>> > +}
>> > +
>> > +static void
>> > +trie_insert_prefix(struct trie_node **edge, const ovs_be32 *prefix, int
>> > mlen)
>> > +{
>> >     struct trie_node *node;
>> > -    struct trie_node **edge;
>> >     int ofs = 0;
>> >
>> >     /* Walk the tree. */
>> > -    for (edge = &trie->root;
>> > -         (node = *edge) != NULL;
>> > +    for (; (node = *edge) != NULL;
>> >          edge = trie_next_edge(node, prefix, ofs)) {
>> >         unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs,
>> > mlen);
>> >         ofs += eqbits;
>> > @@ -1561,16 +1628,25 @@ trie_insert(struct cls_trie *trie, const struct
>> > cls_rule *rule, int mlen)
>> > static void
>> > trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int
>> > mlen)
>> > {
>> > -    const ovs_be32 *prefix = minimatch_get_prefix(&rule->match,
>> > trie->field);
>> > +    trie_remove_prefix(&trie->root,
>> > +                       minimatch_get_prefix(&rule->match, trie->field),
>> > mlen);
>> > +}
>> > +
>> > +/* 'mlen' must be the (non-zero) CIDR prefix length of the
>> > 'trie->field' mask
>> > + * in 'rule'. */
>> > +static void
>> > +trie_remove_prefix(struct trie_node **root, const ovs_be32 *prefix, int
>> > mlen)
>> > +{
>> >     struct trie_node *node;
>> >     struct trie_node **edges[sizeof(union mf_value) * 8];
>> >     int depth = 0, ofs = 0;
>> >
>> >     /* Walk the tree. */
>> > -    for (edges[depth] = &trie->root;
>> > +    for (edges[0] = root;
>> >          (node = *edges[depth]) != NULL;
>> >          edges[++depth] = trie_next_edge(node, prefix, ofs)) {
>> >         unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs,
>> > mlen);
>> > +
>> >         if (eqbits < node->nbits) {
>> >             /* Mismatch, nothing to be removed.  This should never
>> > happen, as
>> >              * only rules in the classifier are ever removed. */
>> > diff --git a/lib/classifier.h b/lib/classifier.h
>> > index c3c1c3b..a1f4e4a 100644
>> > --- a/lib/classifier.h
>> > +++ b/lib/classifier.h
>> > @@ -276,6 +276,8 @@ struct cls_subtable {
>> >     uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 flow segment boundaries.
>> > */
>> >     struct hindex indices[CLS_MAX_INDICES]; /* Staged lookup indices. */
>> >     unsigned int trie_plen[CLS_MAX_TRIES];  /* Trie prefix length in
>> > 'mask'. */
>> > +    struct trie_node *ports_trie; /* NULL if none. */
>> > +    int ports_mask_len;
>> > };
>> >
>> > /* Returns true if 'table' is a "catch-all" subtable that will match
>> > every
>> > diff --git a/tests/classifier.at b/tests/classifier.at
>> > index a46c526..96fb29b 100644
>> > --- a/tests/classifier.at
>> > +++ b/tests/classifier.at
>> > @@ -55,7 +55,7 @@ Datapath actions: drop
>> > ])
>> > AT_CHECK([ovs-appctl ofproto/trace br0
>> > 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'],
>> > [0], [stdout])
>> > AT_CHECK([tail -2 stdout], [0],
>> > -  [Megaflow:
>> > skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=79
>> > +  [Megaflow:
>> > skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=0x40/0xfff0
>> > Datapath actions: 2
>> > ])
>> > OVS_VSWITCHD_STOP
>> > @@ -78,6 +78,8 @@ AT_DATA([flows.txt], [dnl
>> > table=0 in_port=1
>> > priority=16,tcp,nw_dst=10.1.0.0/255.255.0.0,action=output(3)
>> > table=0 in_port=1
>> > priority=32,tcp,nw_dst=10.1.2.0/255.255.255.0,tp_src=79,action=output(2)
>> > table=0 in_port=1 priority=33,tcp,nw_dst=10.1.2.15,tp_dst=80,action=drop
>> > +table=0 in_port=1
>> > priority=33,tcp,nw_dst=10.1.2.15,tp_dst=8080,action=output(2)
>> > +table=0 in_port=1
>> > priority=33,tcp,nw_dst=10.1.2.15,tp_dst=192,action=output(2)
>> > table=0 in_port=1 priority=0,ip,action=drop
>> > table=0 in_port=2
>> > priority=16,tcp,nw_dst=192.168.0.0/255.255.0.0,action=output(1)
>> > table=0 in_port=2 priority=0,ip,action=drop
>> > @@ -102,7 +104,7 @@ Datapath actions: drop
>> > ])
>> > AT_CHECK([ovs-appctl ofproto/trace br0
>> > 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'],
>> > [0], [stdout])
>> > AT_CHECK([tail -2 stdout], [0],
>> > -  [Megaflow:
>> > skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=8,tp_dst=79
>> > +  [Megaflow:
>> > skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=0x0/0xffc0,tp_dst=0x40/0xfff0
>> > Datapath actions: 3
>> > ])
>> > OVS_VSWITCHD_STOP(["/'prefixes' with incompatible field: ipv6_label/d"])
>> > --
>> > 1.7.10.4
>> >
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to