WARNING: This patch *only* compiles! NOT tested yet!

This patch adds the initial IGMP snooping support.
I sent out this patch early to get some early reviews,
especially the design of the code.

Any comments are welcome!

Cc: Ben Pfaff <b...@nicira.com>
Cc: Jesse Gross <je...@nicira.com>
Not-Yet-Signed-off-by: Cong Wang <amw...@redhat.com>

---
 include/sparse/netinet/in.h |    1 +
 lib/automake.mk             |    2 +
 lib/flow.c                  |   21 +++
 lib/flow.h                  |    3 +-
 lib/igmp-snooping.c         |  349 +++++++++++++++++++++++++++++++++++++++++++
 lib/igmp-snooping.h         |  102 +++++++++++++
 lib/learning-switch.c       |   11 ++
 lib/packets.h               |   15 ++
 ofproto/ofproto-dpif.c      |  195 +++++++++++++++++++++----
 ofproto/ofproto-provider.h  |    2 +
 ofproto/ofproto.c           |   10 ++
 ofproto/ofproto.h           |    2 +
 vswitchd/bridge.c           |    2 +
 13 files changed, 688 insertions(+), 27 deletions(-)
 create mode 100644 lib/igmp-snooping.c
 create mode 100644 lib/igmp-snooping.h

diff --git a/include/sparse/netinet/in.h b/include/sparse/netinet/in.h
index b3924c3..16af225 100644
--- a/include/sparse/netinet/in.h
+++ b/include/sparse/netinet/in.h
@@ -51,6 +51,7 @@ extern const struct in6_addr in6addr_any;
 #define IPPROTO_IP 0
 #define IPPROTO_HOPOPTS 0
 #define IPPROTO_ICMP 1
+#define IPPROTO_IGMP 2
 #define IPPROTO_TCP 6
 #define IPPROTO_UDP 17
 #define IPPROTO_ROUTING 43
diff --git a/lib/automake.mk b/lib/automake.mk
index ce3edc3..33af27a 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -79,6 +79,8 @@ lib_libopenvswitch_a_SOURCES = \
        lib/lockfile.h \
        lib/mac-learning.c \
        lib/mac-learning.h \
+       lib/igmp-snooping.c \
+       lib/igmp-snooping.h \
        lib/match.c \
        lib/match.h \
        lib/memory.c \
diff --git a/lib/flow.c b/lib/flow.c
index 397bda1..9cca7d9 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -87,6 +87,12 @@ pull_icmp(struct ofpbuf *packet)
     return ofpbuf_try_pull(packet, ICMP_HEADER_LEN);
 }
 
+static struct igmp_header *
+pull_igmp(struct ofpbuf *packet)
+{
+    return ofpbuf_try_pull(packet, IGMP_HEADER_LEN);
+}
+
 static struct icmp6_hdr *
 pull_icmpv6(struct ofpbuf *packet)
 {
@@ -460,6 +466,13 @@ flow_extract_l3_onwards(struct ofpbuf *packet, struct flow 
*flow,
                         flow->tp_dst = htons(icmp->icmp_code);
                         packet->l7 = b.data;
                     }
+                } else if (flow->nw_proto == IPPROTO_IGMP) {
+                    const struct igmp_header *igmp = pull_igmp(&b);
+                    if (igmp) {
+                        flow->tp_src = htons(igmp->igmp_type);
+                        flow->tp_dst = htons(igmp->igmp_code);
+                        flow->igmp_group = htons(igmp->group);
+                    }
                 }
             }
         }
@@ -925,6 +938,14 @@ flow_compose(struct ofpbuf *b, const struct flow *flow)
                 icmp->icmp_type = ntohs(flow->tp_src);
                 icmp->icmp_code = ntohs(flow->tp_dst);
                 icmp->icmp_csum = csum(icmp, ICMP_HEADER_LEN);
+            } else if (flow->nw_proto == IPPROTO_IGMP) {
+                struct igmp_header *igmp;
+
+                b->l4 = igmp = ofpbuf_put_zeros(b, sizeof *igmp);
+                igmp->igmp_type = ntohs(flow->tp_src);
+                igmp->igmp_code = ntohs(flow->tp_dst);
+                igmp->group = ntohs(flow->igmp_group);
+                igmp->igmp_csum = csum(igmp, IGMP_HEADER_LEN);
             }
         }
 
diff --git a/lib/flow.h b/lib/flow.h
index e6da480..4b540f9 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -98,6 +98,7 @@ struct flow {
     ovs_be16 encap_dl_type;     /* MPLS encapsulated Ethernet frame type */
     ovs_be16 tp_src;            /* TCP/UDP source port. */
     ovs_be16 tp_dst;            /* TCP/UDP destination port. */
+    ovs_be32 igmp_group;        /* IGMP multicast group. */
     uint8_t dl_src[6];          /* Ethernet source address. */
     uint8_t dl_dst[6];          /* Ethernet destination address. */
     uint8_t nw_proto;           /* IP protocol or low 8 bits of ARP opcode. */
@@ -113,7 +114,7 @@ BUILD_ASSERT_DECL(sizeof(struct flow) % 4 == 0);
 #define FLOW_U32S (sizeof(struct flow) / 4)
 
 /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */
-BUILD_ASSERT_DECL(sizeof(struct flow) == sizeof(struct flow_tnl) + 160 &&
+BUILD_ASSERT_DECL(sizeof(struct flow) == sizeof(struct flow_tnl) + 168 &&
                   FLOW_WC_SEQ == 19);
 
 /* Represents the metadata fields of struct flow. */
diff --git a/lib/igmp-snooping.c b/lib/igmp-snooping.c
new file mode 100644
index 0000000..3e96b38
--- /dev/null
+++ b/lib/igmp-snooping.c
@@ -0,0 +1,349 @@
+/*
+ * Copyright (c) 2013 Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "igmp-snooping.h"
+
+#include <inttypes.h>
+#include <stdlib.h>
+
+#include "bitmap.h"
+#include "coverage.h"
+#include "hash.h"
+#include "list.h"
+#include "poll-loop.h"
+#include "tag.h"
+#include "timeval.h"
+#include "unaligned.h"
+#include "util.h"
+#include "vlan-bitmap.h"
+#include "vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(igmp_snooping);
+
+COVERAGE_DEFINE(igmp_snooping_learned);
+COVERAGE_DEFINE(igmp_snooping_expired);
+
+/* Returns the number of seconds since 'e' (within 'mdb') was last learned. */
+int
+mdb_entry_age(const struct igmp_mdb *mdb, const struct mdb_entry *e)
+{
+    time_t remaining = e->expires - time_now();
+    return mdb->idle_time - remaining;
+}
+
+static uint32_t
+igmp_mdb_hash(const struct igmp_mdb *mdb, ovs_be32 grp,
+               uint16_t vlan)
+{
+    return hash_3words(grp, vlan, mdb->secret);
+}
+
+static struct mdb_entry *
+mdb_entry_from_lru_node(struct list *list)
+{
+    return CONTAINER_OF(list, struct mdb_entry, lru_node);
+}
+
+/* Returns a tag that represents that 'grp' is on an unknown port in 'vlan'.
+ * (When we learn where 'grp' is in 'vlan', this allows flows that were
+ * flooded to be revalidated.) */
+static tag_type
+make_unknown_mdb_tag(const struct igmp_mdb *mdb,
+                     ovs_be32 grp, uint16_t vlan)
+{
+    return tag_create_deterministic(igmp_mdb_hash(mdb, grp, vlan));
+}
+
+static struct mdb_entry *
+mdb_entry_lookup(const struct igmp_mdb *mdb,
+                 ovs_be32 grp, uint16_t vlan)
+{
+    struct mdb_entry *e;
+
+    HMAP_FOR_EACH_WITH_HASH (e, hmap_node, igmp_mdb_hash(mdb, grp, vlan),
+                             &mdb->table) {
+        if (e->vlan == vlan && e->group == grp) {
+            return e;
+        }
+    }
+    return NULL;
+}
+
+/* If the LRU list is not empty, stores the least-recently-used entry in '*e'
+ * and returns true.  Otherwise, if the LRU list is empty, stores NULL in '*e'
+ * and return false. */
+static bool
+get_lru(struct igmp_mdb *mdb, struct mdb_entry **e)
+{
+    if (!list_is_empty(&mdb->lrus)) {
+        *e = mdb_entry_from_lru_node(mdb->lrus.next);
+        return true;
+    } else {
+        *e = NULL;
+        return false;
+    }
+}
+
+static unsigned int
+normalize_idle_time(unsigned int idle_time)
+{
+    return (idle_time < 15 ? 15
+            : idle_time > 3600 ? 3600
+            : idle_time);
+}
+
+/* Creates and returns a new mdb table with an initial mdb aging
+ * timeout of 'idle_time' seconds and an initial maximum of MDB_DEFAULT_MAX
+ * entries. */
+struct igmp_mdb*
+igmp_snooping_create(unsigned int idle_time)
+{
+    struct igmp_mdb *mdb;
+
+    mdb = xmalloc(sizeof *mdb);
+    list_init(&mdb->lrus);
+    hmap_init(&mdb->table);
+    mdb->secret = random_uint32();
+    mdb->flood_vlans = NULL;
+    mdb->idle_time = normalize_idle_time(idle_time);
+    mdb->max_entries = MDB_DEFAULT_MAX;
+    return mdb;
+}
+
+/* Destroys mdb snooping table 'mdb'. */
+void
+igmp_snooping_destroy(struct igmp_mdb *mdb)
+{
+    if (mdb) {
+        struct mdb_entry *e, *next;
+
+        HMAP_FOR_EACH_SAFE (e, next, hmap_node, &mdb->table) {
+            hmap_remove(&mdb->table, &e->hmap_node);
+            free(e);
+        }
+        hmap_destroy(&mdb->table);
+
+        bitmap_free(mdb->flood_vlans);
+        free(mdb);
+    }
+}
+
+/* Provides a bitmap of VLANs which have learning disabled, that is, VLANs on
+ * which all packets are flooded.  Returns true if the set has changed from the
+ * previous value. */
+bool
+igmp_mdb_set_flood_vlans(struct igmp_mdb *mdb,
+                             const unsigned long *bitmap)
+{
+    if (vlan_bitmap_equal(mdb->flood_vlans, bitmap)) {
+        return false;
+    } else {
+        bitmap_free(mdb->flood_vlans);
+        mdb->flood_vlans = vlan_bitmap_clone(bitmap);
+        return true;
+    }
+}
+
+/* Changes the mdb aging timeout of 'mdb' to 'idle_time' seconds. */
+void
+igmp_mdb_set_idle_time(struct igmp_mdb *mdb, unsigned int idle_time)
+{
+    idle_time = normalize_idle_time(idle_time);
+    if (idle_time != mdb->idle_time) {
+        struct mdb_entry *e;
+        int delta;
+
+        delta = (int) idle_time - (int) mdb->idle_time;
+        LIST_FOR_EACH (e, lru_node, &mdb->lrus) {
+            e->expires += delta;
+        }
+        mdb->idle_time = idle_time;
+    }
+}
+
+/* Sets the maximum number of entries in 'mdb' to 'max_entries', adjusting it
+ * to be within a reasonable range. */
+void
+igmp_mdb_set_max_entries(struct igmp_mdb *mdb, size_t max_entries)
+{
+    mdb->max_entries = (max_entries < 10 ? 10
+                       : max_entries > 1000 * 1000 ? 1000 * 1000
+                       : max_entries);
+}
+
+static bool
+is_learning_vlan(const struct igmp_mdb *mdb, uint16_t vlan)
+{
+    return !mdb->flood_vlans || !bitmap_is_set(mdb->flood_vlans, vlan);
+}
+
+/* Returns true if 'dst' may be learned on 'vlan' for 'mdb'.
+ * Returns false if 'mdb' is NULL, if dst is not valid, or if
+ * 'vlan' is configured on 'mdb' to flood all packets. */
+bool
+igmp_may_snoop(const struct igmp_mdb *mdb, const ovs_be32 dst, uint16_t vlan)
+{
+    return mdb && is_learning_vlan(mdb, vlan) && !ip_is_multicast(dst);
+}
+
+/* Searches 'mdb' for and returns an mdb entry for 'grp' in 'vlan',
+ * inserting a new entry if necessary.  The caller must have already verified,
+ * by calling mac_learning_may_learn(), that 'grp' and 'vlan' are
+ * learnable.
+ *
+ * If the returned mdb entry is new (as may be determined by calling
+ * mdb_entry_is_new()), then the caller must pass the new entry to
+ * igmp_mdb_changed().  The caller must also initialize the new entry's
+ * 'port' member.  Otherwise calling those functions is at the caller's
+ * discretion. */
+struct mdb_entry *
+igmp_mdb_insert(struct igmp_mdb *mdb, ovs_be32 grp, uint16_t vlan)
+{
+    struct mdb_entry *e;
+
+    e = mdb_entry_lookup(mdb, grp, vlan);
+    if (!e) {
+        uint32_t hash = igmp_mdb_hash(mdb, grp, vlan);
+
+        if (hmap_count(&mdb->table) >= mdb->max_entries) {
+            get_lru(mdb, &e);
+            igmp_mdb_expire(mdb, e);
+        }
+
+        e = xmalloc(sizeof *e);
+        hmap_insert(&mdb->table, &e->hmap_node, hash);
+        e->group = grp;
+        e->vlan = vlan;
+        e->tag = 0;
+    } else {
+        list_remove(&e->lru_node);
+    }
+
+    /* Mark 'e' as recently used. */
+    list_push_back(&mdb->lrus, &e->lru_node);
+    e->expires = time_now() + mdb->idle_time;
+
+    return e;
+}
+
+/* Changes 'e''s tag to a new, randomly selected one, and returns the tag that
+ * would have been previously used for this entry's group and VLAN (either 
before
+ * 'e' was inserted, if it is new, or otherwise before its port was updated.)
+ *
+ * The client should call this function after obtaining a mdb entry
+ * from igmp_mdb_insert(), if the entry is either new or if its learned
+ * port has changed. */
+tag_type
+igmp_mdb_changed(struct igmp_mdb *mdb, struct mdb_entry *e)
+{
+    tag_type old_tag = e->tag;
+
+    COVERAGE_INC(igmp_snooping_learned);
+
+    e->tag = tag_create_random();
+    return old_tag ? old_tag : make_unknown_mdb_tag(mdb, e->group, e->vlan);
+}
+
+/* Looks up group 'dst' for VLAN 'vlan' in 'mdb' and returns the associated
+ * mdb entry, if any.  If 'tag' is nonnull, then the tag that associates
+ * 'dst' and 'vlan' with its currently learned port will be OR'd into
+ * '*tag'. */
+struct mdb_entry *
+igmp_mdb_lookup(const struct igmp_mdb *mdb, ovs_be32 dst,
+                    uint16_t vlan, tag_type *tag)
+{
+    if (!ip_is_multicast(dst)) {
+        /* No tag because the treatment of multicast destinations never
+         * changes. */
+        return NULL;
+    } else if (!is_learning_vlan(mdb, vlan)) {
+        /* We don't tag this property.  The set of learning VLANs changes so
+         * rarely that we revalidate every flow when it changes. */
+        return NULL;
+    } else {
+        struct mdb_entry *e = mdb_entry_lookup(mdb, dst, vlan);
+
+        ovs_assert(e == NULL || e->tag != 0);
+        if (tag) {
+            /* Tag either the learned port or the lack thereof. */
+            *tag |= e ? e->tag : make_unknown_mdb_tag(mdb, dst, vlan);
+        }
+        return e;
+    }
+}
+
+/* Expires 'e' from the 'mdb' hash table. */
+void
+igmp_mdb_expire(struct igmp_mdb *mdb, struct mdb_entry *e)
+{
+    hmap_remove(&mdb->table, &e->hmap_node);
+    list_remove(&e->lru_node);
+    free(e);
+}
+
+/* Expires all the mdb entries in 'mdb'.  If not NULL, the tags in 'mdb'
+ * are added to 'tags'.  Otherwise the tags in 'mdb' are discarded.  The client
+ * is responsible for revalidating any flows that depend on 'mdb', if
+ * necessary. */
+void
+igmp_mdb_flush(struct igmp_mdb *mdb, struct tag_set *tags)
+{
+    struct mdb_entry *e;
+    while (get_lru(mdb, &e)){
+        if (tags) {
+            tag_set_add(tags, e->tag);
+        }
+        igmp_mdb_expire(mdb, e);
+    }
+    hmap_shrink(&mdb->table);
+}
+
+void
+igmp_mdb_delete(struct igmp_mdb *mdb, ovs_be32 group, int vlan)
+{
+    struct mdb_entry *mdb_entry;
+
+    mdb_entry = mdb_entry_lookup(mdb, group, vlan);
+    if (mdb_entry)
+        igmp_mdb_expire(mdb, mdb_entry);
+}
+
+void
+igmp_snooping_run(struct igmp_mdb *mdb, struct tag_set *set)
+{
+    struct mdb_entry *e;
+    while (get_lru(mdb, &e)
+           && (hmap_count(&mdb->table) > mdb->max_entries
+               || time_now() >= e->expires)) {
+        COVERAGE_INC(igmp_snooping_expired);
+        if (set) {
+            tag_set_add(set, e->tag);
+        }
+        igmp_mdb_expire(mdb, e);
+    }
+}
+
+void
+igmp_snooping_wait(struct igmp_mdb *mdb)
+{
+    if (hmap_count(&mdb->table) > mdb->max_entries) {
+        poll_immediate_wake();
+    } else if (!list_is_empty(&mdb->lrus)) {
+        struct mdb_entry *e = mdb_entry_from_lru_node(mdb->lrus.next);
+        poll_timer_wait_until(e->expires * 1000LL);
+    }
+}
diff --git a/lib/igmp-snooping.h b/lib/igmp-snooping.h
new file mode 100644
index 0000000..d1f8fbb
--- /dev/null
+++ b/lib/igmp-snooping.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2013 Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef IGMP_SNOOPING_H
+#define IGMP_SNOOPING_H 1
+
+#include <time.h>
+#include "hmap.h"
+#include "list.h"
+#include "packets.h"
+#include "tag.h"
+#include "timeval.h"
+
+struct igmp_mdb;
+
+/* Default maximum size of a mdb table, in entries. */
+#define MDB_DEFAULT_MAX 2048
+
+/* Time, in seconds, before expiring a mdb_entry due to inactivity. */
+#define MDB_ENTRY_DEFAULT_IDLE_TIME 300
+
+/* Multicast group entry. */
+struct mdb_entry {
+    struct hmap_node hmap_node; /* Node in a mdb hmap. */
+    struct list lru_node;       /* Element in 'lrus' list. */
+    time_t expires;             /* Expiration time. */
+    ovs_be32 group;             /* Known multicast group. */
+    uint16_t vlan;              /* VLAN tag. */
+    tag_type tag;               /* Tag for this entry. */
+
+    /* Learned port. */
+    union {
+        void *p;
+        int i;
+    } port;
+};
+
+int mdb_entry_age(const struct igmp_mdb *, const struct mdb_entry *);
+
+/* Returns true if igmp_mdb_insert() just created 'mdb' and the caller has
+ * not yet properly initialized it. */
+static inline bool mdb_entry_is_new(const struct mdb_entry *mdb)
+{
+    return !mdb->tag;
+}
+
+/* IGMP snooping table. */
+struct igmp_mdb {
+    struct hmap table;          /* Learning table. */
+    struct list lrus;           /* In-use entries, least recently used at the
+                                   front, most recently used at the back. */
+    uint32_t secret;            /* Secret for randomizing hash table. */
+    unsigned long *flood_vlans; /* Bitmap of learning disabled VLANs. */
+    unsigned int idle_time;     /* Max age before deleting an entry. */
+    size_t max_entries;         /* Max number of mdb entries. */
+};
+
+/* Basics. */
+struct igmp_mdb *igmp_snooping_create(unsigned int idle_time);
+void igmp_snooping_destroy(struct igmp_mdb *);
+
+void igmp_snooping_run(struct igmp_mdb *, struct tag_set *);
+void igmp_snooping_wait(struct igmp_mdb *);
+
+/* Configuration. */
+bool igmp_mdb_set_flood_vlans(struct igmp_mdb *,
+                                  const unsigned long *bitmap);
+void igmp_mdb_set_idle_time(struct igmp_mdb *, unsigned int idle_time);
+void igmp_mdb_set_max_entries(struct igmp_mdb *, size_t max_entries);
+
+/* Learning. */
+bool igmp_may_snoop(const struct igmp_mdb *mdb, const ovs_be32 dst,
+                    uint16_t vlan);
+
+struct mdb_entry *igmp_mdb_insert(struct igmp_mdb *mdb, ovs_be32 grp,
+                                  uint16_t vlan);
+void igmp_mdb_delete(struct igmp_mdb *mdb, ovs_be32 group, int vlan);
+
+tag_type igmp_mdb_changed(struct igmp_mdb *mdb, struct mdb_entry *e);
+
+struct mdb_entry *
+igmp_mdb_lookup(const struct igmp_mdb *mdb, ovs_be32 dst,
+                    uint16_t vlan, tag_type *tag);
+
+/* Flushing. */
+void igmp_mdb_expire(struct igmp_mdb *mdb, struct mdb_entry *e);
+void igmp_mdb_flush(struct igmp_mdb *mdb, struct tag_set *tags);
+
+#endif /* igmp-snooping.h */
diff --git a/lib/learning-switch.c b/lib/learning-switch.c
index 4a95dc1..20c7e77 100644
--- a/lib/learning-switch.c
+++ b/lib/learning-switch.c
@@ -28,6 +28,7 @@
 #include "flow.h"
 #include "hmap.h"
 #include "mac-learning.h"
+#include "igmp-snooping.h"
 #include "ofpbuf.h"
 #include "ofp-actions.h"
 #include "ofp-errors.h"
@@ -70,6 +71,7 @@ struct lswitch {
     enum ofputil_protocol protocol;
     unsigned long long int datapath_id;
     struct mac_learning *ml;    /* NULL to act as hub instead of switch. */
+    struct igmp_mdb *mdb;
     struct flow_wildcards wc;   /* Wildcards to apply to flows. */
     bool action_normal;         /* Use OFPP_NORMAL? */
 
@@ -254,6 +256,10 @@ lswitch_run(struct lswitch *sw)
         mac_learning_run(sw->ml, NULL);
     }
 
+    if (sw->mdb) {
+        igmp_snooping_run(sw->mdb, NULL);
+    }
+
     rconn_run(sw->rconn);
 
     if (sw->state == S_CONNECTING) {
@@ -285,6 +291,11 @@ lswitch_wait(struct lswitch *sw)
     if (sw->ml) {
         mac_learning_wait(sw->ml);
     }
+
+    if (sw->mdb) {
+        igmp_snooping_wait(sw->mdb);
+    }
+
     rconn_run_wait(sw->rconn);
     rconn_recv_wait(sw->rconn);
 }
diff --git a/lib/packets.h b/lib/packets.h
index 0f97fe6..5c4995a 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -453,6 +453,21 @@ struct icmp_header {
 };
 BUILD_ASSERT_DECL(ICMP_HEADER_LEN == sizeof(struct icmp_header));
 
+#define IGMP_HEADER_LEN 8
+struct igmp_header {
+    uint8_t igmp_type;
+    uint8_t igmp_code;
+    ovs_be16 igmp_csum;
+    ovs_be32 group;
+};
+BUILD_ASSERT_DECL(IGMP_HEADER_LEN == sizeof(struct igmp_header));
+
+#define IGMP_HOST_MEMBERSHIP_QUERY      0x11    /* From RFC1112 */
+#define IGMP_HOST_MEMBERSHIP_REPORT     0x12    /* Ditto */
+#define IGMPV2_HOST_MEMBERSHIP_REPORT   0x16    /* V2 version of 0x12 */
+#define IGMP_HOST_LEAVE_MESSAGE         0x17
+#define IGMPV3_HOST_MEMBERSHIP_REPORT   0x22    /* V3 version of 0x12 */
+
 #define UDP_HEADER_LEN 8
 struct udp_header {
     ovs_be16 udp_src;
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 7035530..ec44e41 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -33,6 +33,7 @@
 #include "lacp.h"
 #include "learn.h"
 #include "mac-learning.h"
+#include "igmp-snooping.h"
 #include "meta-flow.h"
 #include "multipath.h"
 #include "netdev-vport.h"
@@ -656,6 +657,7 @@ struct ofproto_dpif {
     struct dpif_sflow *sflow;
     struct hmap bundles;        /* Contains "struct ofbundle"s. */
     struct mac_learning *ml;
+    struct igmp_mdb *mdb;
     struct ofmirror *mirrors[MAX_MIRRORS];
     bool has_mirrors;
     bool has_bonded_bundles;
@@ -1209,6 +1211,7 @@ construct(struct ofproto *ofproto_)
     ofproto->stp = NULL;
     hmap_init(&ofproto->bundles);
     ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME);
+    ofproto->mdb = igmp_snooping_create(MDB_ENTRY_DEFAULT_IDLE_TIME);
     for (i = 0; i < MAX_MIRRORS; i++) {
         ofproto->mirrors[i] = NULL;
     }
@@ -1502,6 +1505,7 @@ wait(struct ofproto *ofproto_)
         netflow_wait(ofproto->netflow);
     }
     mac_learning_wait(ofproto->ml);
+    igmp_snooping_wait(ofproto->mdb);
     stp_wait(ofproto);
     if (ofproto->backer->need_revalidate) {
         /* Shouldn't happen, but if it does just go around again. */
@@ -2794,6 +2798,8 @@ mirror_set(struct ofproto *ofproto_, void *aux,
     ofproto->has_mirrors = true;
     mac_learning_flush(ofproto->ml,
                        &ofproto->backer->revalidate_set);
+    igmp_mdb_flush(ofproto->mdb,
+                       &ofproto->backer->revalidate_set);
     mirror_update_dups(ofproto);
 
     return 0;
@@ -2814,6 +2820,7 @@ mirror_destroy(struct ofmirror *mirror)
     ofproto = mirror->ofproto;
     ofproto->backer->need_revalidate = REV_RECONFIGURE;
     mac_learning_flush(ofproto->ml, &ofproto->backer->revalidate_set);
+    igmp_mdb_flush(ofproto->mdb, &ofproto->backer->revalidate_set);
 
     mirror_bit = MIRROR_MASK_C(1) << mirror->idx;
     HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
@@ -2866,6 +2873,11 @@ set_flood_vlans(struct ofproto *ofproto_, unsigned long 
*flood_vlans)
     if (mac_learning_set_flood_vlans(ofproto->ml, flood_vlans)) {
         mac_learning_flush(ofproto->ml, &ofproto->backer->revalidate_set);
     }
+
+    if (igmp_mdb_set_flood_vlans(ofproto->mdb, flood_vlans)) {
+        igmp_mdb_flush(ofproto->mdb, &ofproto->backer->revalidate_set);
+    }
+
     return 0;
 }
 
@@ -2892,6 +2904,15 @@ set_mac_table_config(struct ofproto *ofproto_, unsigned 
int idle_time,
     mac_learning_set_idle_time(ofproto->ml, idle_time);
     mac_learning_set_max_entries(ofproto->ml, max_entries);
 }
+
+static void
+set_mdb_config(struct ofproto *ofproto_, unsigned int idle_time,
+                     size_t max_entries)
+{
+    struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
+    igmp_mdb_set_idle_time(ofproto->mdb, idle_time);
+    igmp_mdb_set_max_entries(ofproto->mdb, max_entries);
+}
 
 /* Ports. */
 
@@ -7011,6 +7032,44 @@ update_learning_table(struct ofproto_dpif *ofproto,
     }
 }
 
+static void
+update_mdb_table(struct ofproto_dpif *ofproto,
+                      const struct flow *flow, int vlan,
+                      struct ofbundle *in_bundle)
+{
+    struct mdb_entry *mdb_entry;
+
+    /* Don't learn the OFPP_NONE port. */
+    if (in_bundle == &ofpp_none_bundle) {
+        return;
+    }
+
+    if (!igmp_may_snoop(ofproto->mdb, flow->nw_dst, vlan)) {
+        return;
+    }
+
+    if (ntohs(flow->tp_src) == IGMP_HOST_MEMBERSHIP_REPORT ||
+        ntohs(flow->tp_src) == IGMPV2_HOST_MEMBERSHIP_REPORT) {
+        mdb_entry = igmp_mdb_insert(ofproto->mdb, flow->igmp_group, vlan);
+
+        if (mdb_entry_is_new(mdb_entry) || mdb_entry->port.p != in_bundle) {
+            /* The log messages here could actually be useful in debugging,
+             * so keep the rate limit relatively high. */
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
+            VLOG_DBG_RL(&rl, "bridge %s: learned that "IP_FMT" is "
+                        "on port %s in VLAN %d",
+                        ofproto->up.name, IP_ARGS(flow->igmp_group),
+                        in_bundle->name, vlan);
+
+            mdb_entry->port.p = in_bundle;
+            tag_set_add(&ofproto->backer->revalidate_set,
+                        igmp_mdb_changed(ofproto->mdb, mdb_entry));
+        }
+    } else if (ntohs(flow->tp_src) == IGMP_HOST_LEAVE_MESSAGE) {
+        igmp_mdb_delete(ofproto->mdb, flow->igmp_group, vlan);
+    }
+}
+
 static struct ofbundle *
 lookup_input_bundle(const struct ofproto_dpif *ofproto, uint16_t in_port,
                     bool warn, struct ofport_dpif **in_ofportp)
@@ -7117,6 +7176,7 @@ xlate_normal(struct action_xlate_ctx *ctx)
     struct ofport_dpif *in_port;
     struct ofbundle *in_bundle;
     struct mac_entry *mac;
+    struct mdb_entry *mdb_entry;
     uint16_t vlan;
     uint16_t vid;
 
@@ -7169,32 +7229,62 @@ xlate_normal(struct action_xlate_ctx *ctx)
 
     /* Learn source MAC. */
     if (ctx->may_learn) {
-        update_learning_table(ctx->ofproto, &ctx->flow, vlan, in_bundle);
-    }
-
-    /* Determine output bundle. */
-    mac = mac_learning_lookup(ctx->ofproto->ml, ctx->flow.dl_dst, vlan,
-                              &ctx->tags);
-    if (mac) {
-        if (mac->port.p != in_bundle) {
-            xlate_report(ctx, "forwarding to learned port");
-            output_normal(ctx, mac->port.p, vlan);
+        if (eth_addr_is_multicast(ctx->flow.dl_dst) &&
+           (ctx->flow.nw_proto == IPPROTO_IGMP))
+            update_mdb_table(ctx->ofproto, &ctx->flow, vlan, in_bundle);
+        else
+            update_learning_table(ctx->ofproto, &ctx->flow, vlan, in_bundle);
+    }
+
+    if (eth_addr_is_multicast(ctx->flow.dl_dst)) {
+        mdb_entry = igmp_mdb_lookup(ctx->ofproto->mdb, ctx->flow.nw_dst, vlan,
+                                     &ctx->tags);
+        if (mdb_entry) {
+            if (mdb_entry->port.p != in_bundle) {
+                xlate_report(ctx, "forwarding to learned port");
+                output_normal(ctx, mdb_entry->port.p, vlan);
+            } else {
+                xlate_report(ctx, "learned port is input port, dropping");
+            }
         } else {
-            xlate_report(ctx, "learned port is input port, dropping");
+            struct ofbundle *bundle;
+
+            xlate_report(ctx, "no learned multicast group for destination, 
flooding");
+            HMAP_FOR_EACH (bundle, hmap_node, &ctx->ofproto->bundles) {
+                if (bundle != in_bundle
+                    && ofbundle_includes_vlan(bundle, vlan)
+                    && bundle->floodable
+                    && !bundle->mirror_out) {
+                    output_normal(ctx, bundle, vlan);
+                }
+            }
+            ctx->nf_output_iface = NF_OUT_FLOOD;
         }
     } else {
-        struct ofbundle *bundle;
-
-        xlate_report(ctx, "no learned MAC for destination, flooding");
-        HMAP_FOR_EACH (bundle, hmap_node, &ctx->ofproto->bundles) {
-            if (bundle != in_bundle
-                && ofbundle_includes_vlan(bundle, vlan)
-                && bundle->floodable
-                && !bundle->mirror_out) {
-                output_normal(ctx, bundle, vlan);
+        /* Determine output bundle. */
+        mac = mac_learning_lookup(ctx->ofproto->ml, ctx->flow.dl_dst, vlan,
+                                     &ctx->tags);
+        if (mac) {
+            if (mac->port.p != in_bundle) {
+                xlate_report(ctx, "forwarding to learned port");
+                output_normal(ctx, mac->port.p, vlan);
+            } else {
+                xlate_report(ctx, "learned port is input port, dropping");
             }
+        } else {
+            struct ofbundle *bundle;
+
+            xlate_report(ctx, "no learned MAC for destination, flooding");
+            HMAP_FOR_EACH (bundle, hmap_node, &ctx->ofproto->bundles) {
+                if (bundle != in_bundle
+                    && ofbundle_includes_vlan(bundle, vlan)
+                    && bundle->floodable
+                    && !bundle->mirror_out) {
+                    output_normal(ctx, bundle, vlan);
+                }
+            }
+            ctx->nf_output_iface = NF_OUT_FLOOD;
         }
-        ctx->nf_output_iface = NF_OUT_FLOOD;
     }
 }
 
@@ -7465,7 +7555,7 @@ ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int 
argc OVS_UNUSED,
 {
     struct ds ds = DS_EMPTY_INITIALIZER;
     const struct ofproto_dpif *ofproto;
-    const struct mac_entry *e;
+    const struct mdb_entry *e;
 
     ofproto = ofproto_dpif_lookup(argv[1]);
     if (!ofproto) {
@@ -7474,12 +7564,60 @@ ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int 
argc OVS_UNUSED,
     }
 
     ds_put_cstr(&ds, " port  VLAN  MAC                Age\n");
-    LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
+    LIST_FOR_EACH (e, lru_node, &ofproto->mdb->lrus) {
+        struct ofbundle *bundle = e->port.p;
+        ds_put_format(&ds, "%5d  %4d  "IP_FMT"  %3d\n",
+                      ofbundle_get_a_port(bundle)->odp_port,
+                      e->vlan, IP_ARGS(e->group),
+                      mdb_entry_age(ofproto->mdb, e));
+    }
+    unixctl_command_reply(conn, ds_cstr(&ds));
+    ds_destroy(&ds);
+}
+
+static void
+ofproto_unixctl_mdb_flush(struct unixctl_conn *conn, int argc,
+                          const char *argv[], void *aux OVS_UNUSED)
+{
+    struct ofproto_dpif *ofproto;
+
+    if (argc > 1) {
+        ofproto = ofproto_dpif_lookup(argv[1]);
+        if (!ofproto) {
+            unixctl_command_reply_error(conn, "no such bridge");
+            return;
+        }
+        igmp_mdb_flush(ofproto->mdb, &ofproto->backer->revalidate_set);
+    } else {
+        HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
+            igmp_mdb_flush(ofproto->mdb, &ofproto->backer->revalidate_set);
+        }
+    }
+
+    unixctl_command_reply(conn, "table successfully flushed");
+}
+
+static void
+ofproto_unixctl_mdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
+                         const char *argv[], void *aux OVS_UNUSED)
+{
+    struct ds ds = DS_EMPTY_INITIALIZER;
+    const struct ofproto_dpif *ofproto;
+    const struct mdb_entry *e;
+
+    ofproto = ofproto_dpif_lookup(argv[1]);
+    if (!ofproto) {
+        unixctl_command_reply_error(conn, "no such bridge");
+        return;
+    }
+
+    ds_put_cstr(&ds, " port  VLAN  GROUP                Age\n");
+    LIST_FOR_EACH (e, lru_node, &ofproto->mdb->lrus) {
         struct ofbundle *bundle = e->port.p;
-        ds_put_format(&ds, "%5d  %4d  "ETH_ADDR_FMT"  %3d\n",
+        ds_put_format(&ds, "%5d  %4d  "IP_FMT"  %3d\n",
                       ofbundle_get_a_port(bundle)->odp_port,
-                      e->vlan, ETH_ADDR_ARGS(e->mac),
-                      mac_entry_age(ofproto->ml, e));
+                      e->vlan, IP_ARGS(e->group),
+                      mdb_entry_age(ofproto->mdb, e));
     }
     unixctl_command_reply(conn, ds_cstr(&ds));
     ds_destroy(&ds);
@@ -8085,6 +8223,10 @@ ofproto_dpif_unixctl_init(void)
                              ofproto_unixctl_fdb_flush, NULL);
     unixctl_command_register("fdb/show", "bridge", 1, 1,
                              ofproto_unixctl_fdb_show, NULL);
+    unixctl_command_register("mdb/flush", "[bridge]", 0, 1,
+                             ofproto_unixctl_mdb_flush, NULL);
+    unixctl_command_register("mdb/show", "bridge", 1, 1,
+                             ofproto_unixctl_mdb_show, NULL);
     unixctl_command_register("ofproto/clog", "", 0, 0,
                              ofproto_dpif_clog, NULL);
     unixctl_command_register("ofproto/unclog", "", 0, 0,
@@ -8388,4 +8530,5 @@ const struct ofproto_class ofproto_dpif_class = {
     forward_bpdu_changed,
     set_mac_table_config,
     set_realdev,
+    set_mdb_config,
 };
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 95bda33..c2f9f23 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -1314,6 +1314,8 @@ struct ofproto_class {
      * it. */
     int (*set_realdev)(struct ofport *ofport,
                        uint16_t realdev_ofp_port, int vid);
+    void (*set_mdb_config)(struct ofproto *ofproto,
+                                 unsigned int idle_time, size_t max_entries);
 };
 
 extern const struct ofproto_class ofproto_dpif_class;
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index a9c7e76..477df42 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -599,6 +599,16 @@ ofproto_set_mac_table_config(struct ofproto *ofproto, 
unsigned idle_time,
 }
 
 void
+ofproto_set_mdb_config(struct ofproto *ofproto, unsigned idle_time,
+                             size_t max_entries)
+{
+    if (ofproto->ofproto_class->set_mdb_config) {
+        ofproto->ofproto_class->set_mdb_config(ofproto, idle_time,
+                                                     max_entries);
+    }
+}
+
+void
 ofproto_set_dp_desc(struct ofproto *p, const char *dp_desc)
 {
     free(p->dp_desc);
diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index 3a66d1b..643b1b7 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -225,6 +225,8 @@ void ofproto_set_flow_eviction_threshold(struct ofproto *, 
unsigned threshold);
 void ofproto_set_forward_bpdu(struct ofproto *, bool forward_bpdu);
 void ofproto_set_mac_table_config(struct ofproto *, unsigned idle_time,
                                   size_t max_entries);
+void ofproto_set_mdb_config(struct ofproto *, unsigned idle_time,
+                                  size_t max_entries);
 void ofproto_set_dp_desc(struct ofproto *, const char *dp_desc);
 int ofproto_set_snoops(struct ofproto *, const struct sset *snoops);
 int ofproto_set_netflow(struct ofproto *,
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index ab0ecd6..817e47f 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -32,6 +32,7 @@
 #include "lacp.h"
 #include "list.h"
 #include "mac-learning.h"
+#include "igmp-snooping.h"
 #include "meta-flow.h"
 #include "netdev.h"
 #include "ofp-print.h"
@@ -1510,6 +1511,7 @@ bridge_configure_mac_table(struct bridge *br)
                       : MAC_DEFAULT_MAX);
 
     ofproto_set_mac_table_config(br->ofproto, idle_time, mac_table_size);
+    ofproto_set_mdb_config(br->ofproto, idle_time, mac_table_size);
 }
 
 static void
-- 
1.7.7.6

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to