WARNING: This patch *only* compiles! NOT tested yet! This patch adds the initial IGMP snooping support. I sent out this patch early to get some early reviews, especially the design of the code.
Any comments are welcome! Cc: Ben Pfaff <b...@nicira.com> Cc: Jesse Gross <je...@nicira.com> Not-Yet-Signed-off-by: Cong Wang <amw...@redhat.com> --- include/sparse/netinet/in.h | 1 + lib/automake.mk | 2 + lib/flow.c | 21 +++ lib/flow.h | 3 +- lib/igmp-snooping.c | 349 +++++++++++++++++++++++++++++++++++++++++++ lib/igmp-snooping.h | 102 +++++++++++++ lib/learning-switch.c | 11 ++ lib/packets.h | 15 ++ ofproto/ofproto-dpif.c | 195 +++++++++++++++++++++---- ofproto/ofproto-provider.h | 2 + ofproto/ofproto.c | 10 ++ ofproto/ofproto.h | 2 + vswitchd/bridge.c | 2 + 13 files changed, 688 insertions(+), 27 deletions(-) create mode 100644 lib/igmp-snooping.c create mode 100644 lib/igmp-snooping.h diff --git a/include/sparse/netinet/in.h b/include/sparse/netinet/in.h index b3924c3..16af225 100644 --- a/include/sparse/netinet/in.h +++ b/include/sparse/netinet/in.h @@ -51,6 +51,7 @@ extern const struct in6_addr in6addr_any; #define IPPROTO_IP 0 #define IPPROTO_HOPOPTS 0 #define IPPROTO_ICMP 1 +#define IPPROTO_IGMP 2 #define IPPROTO_TCP 6 #define IPPROTO_UDP 17 #define IPPROTO_ROUTING 43 diff --git a/lib/automake.mk b/lib/automake.mk index ce3edc3..33af27a 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -79,6 +79,8 @@ lib_libopenvswitch_a_SOURCES = \ lib/lockfile.h \ lib/mac-learning.c \ lib/mac-learning.h \ + lib/igmp-snooping.c \ + lib/igmp-snooping.h \ lib/match.c \ lib/match.h \ lib/memory.c \ diff --git a/lib/flow.c b/lib/flow.c index 397bda1..9cca7d9 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -87,6 +87,12 @@ pull_icmp(struct ofpbuf *packet) return ofpbuf_try_pull(packet, ICMP_HEADER_LEN); } +static struct igmp_header * +pull_igmp(struct ofpbuf *packet) +{ + return ofpbuf_try_pull(packet, IGMP_HEADER_LEN); +} + static struct icmp6_hdr * pull_icmpv6(struct ofpbuf *packet) { @@ -460,6 +466,13 @@ flow_extract_l3_onwards(struct ofpbuf *packet, struct flow *flow, flow->tp_dst = htons(icmp->icmp_code); packet->l7 = b.data; } + } else if (flow->nw_proto == IPPROTO_IGMP) { + const struct igmp_header *igmp = pull_igmp(&b); + if (igmp) { + flow->tp_src = htons(igmp->igmp_type); + flow->tp_dst = htons(igmp->igmp_code); + flow->igmp_group = htons(igmp->group); + } } } } @@ -925,6 +938,14 @@ flow_compose(struct ofpbuf *b, const struct flow *flow) icmp->icmp_type = ntohs(flow->tp_src); icmp->icmp_code = ntohs(flow->tp_dst); icmp->icmp_csum = csum(icmp, ICMP_HEADER_LEN); + } else if (flow->nw_proto == IPPROTO_IGMP) { + struct igmp_header *igmp; + + b->l4 = igmp = ofpbuf_put_zeros(b, sizeof *igmp); + igmp->igmp_type = ntohs(flow->tp_src); + igmp->igmp_code = ntohs(flow->tp_dst); + igmp->group = ntohs(flow->igmp_group); + igmp->igmp_csum = csum(igmp, IGMP_HEADER_LEN); } } diff --git a/lib/flow.h b/lib/flow.h index e6da480..4b540f9 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -98,6 +98,7 @@ struct flow { ovs_be16 encap_dl_type; /* MPLS encapsulated Ethernet frame type */ ovs_be16 tp_src; /* TCP/UDP source port. */ ovs_be16 tp_dst; /* TCP/UDP destination port. */ + ovs_be32 igmp_group; /* IGMP multicast group. */ uint8_t dl_src[6]; /* Ethernet source address. */ uint8_t dl_dst[6]; /* Ethernet destination address. */ uint8_t nw_proto; /* IP protocol or low 8 bits of ARP opcode. */ @@ -113,7 +114,7 @@ BUILD_ASSERT_DECL(sizeof(struct flow) % 4 == 0); #define FLOW_U32S (sizeof(struct flow) / 4) /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */ -BUILD_ASSERT_DECL(sizeof(struct flow) == sizeof(struct flow_tnl) + 160 && +BUILD_ASSERT_DECL(sizeof(struct flow) == sizeof(struct flow_tnl) + 168 && FLOW_WC_SEQ == 19); /* Represents the metadata fields of struct flow. */ diff --git a/lib/igmp-snooping.c b/lib/igmp-snooping.c new file mode 100644 index 0000000..3e96b38 --- /dev/null +++ b/lib/igmp-snooping.c @@ -0,0 +1,349 @@ +/* + * Copyright (c) 2013 Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> +#include "igmp-snooping.h" + +#include <inttypes.h> +#include <stdlib.h> + +#include "bitmap.h" +#include "coverage.h" +#include "hash.h" +#include "list.h" +#include "poll-loop.h" +#include "tag.h" +#include "timeval.h" +#include "unaligned.h" +#include "util.h" +#include "vlan-bitmap.h" +#include "vlog.h" + +VLOG_DEFINE_THIS_MODULE(igmp_snooping); + +COVERAGE_DEFINE(igmp_snooping_learned); +COVERAGE_DEFINE(igmp_snooping_expired); + +/* Returns the number of seconds since 'e' (within 'mdb') was last learned. */ +int +mdb_entry_age(const struct igmp_mdb *mdb, const struct mdb_entry *e) +{ + time_t remaining = e->expires - time_now(); + return mdb->idle_time - remaining; +} + +static uint32_t +igmp_mdb_hash(const struct igmp_mdb *mdb, ovs_be32 grp, + uint16_t vlan) +{ + return hash_3words(grp, vlan, mdb->secret); +} + +static struct mdb_entry * +mdb_entry_from_lru_node(struct list *list) +{ + return CONTAINER_OF(list, struct mdb_entry, lru_node); +} + +/* Returns a tag that represents that 'grp' is on an unknown port in 'vlan'. + * (When we learn where 'grp' is in 'vlan', this allows flows that were + * flooded to be revalidated.) */ +static tag_type +make_unknown_mdb_tag(const struct igmp_mdb *mdb, + ovs_be32 grp, uint16_t vlan) +{ + return tag_create_deterministic(igmp_mdb_hash(mdb, grp, vlan)); +} + +static struct mdb_entry * +mdb_entry_lookup(const struct igmp_mdb *mdb, + ovs_be32 grp, uint16_t vlan) +{ + struct mdb_entry *e; + + HMAP_FOR_EACH_WITH_HASH (e, hmap_node, igmp_mdb_hash(mdb, grp, vlan), + &mdb->table) { + if (e->vlan == vlan && e->group == grp) { + return e; + } + } + return NULL; +} + +/* If the LRU list is not empty, stores the least-recently-used entry in '*e' + * and returns true. Otherwise, if the LRU list is empty, stores NULL in '*e' + * and return false. */ +static bool +get_lru(struct igmp_mdb *mdb, struct mdb_entry **e) +{ + if (!list_is_empty(&mdb->lrus)) { + *e = mdb_entry_from_lru_node(mdb->lrus.next); + return true; + } else { + *e = NULL; + return false; + } +} + +static unsigned int +normalize_idle_time(unsigned int idle_time) +{ + return (idle_time < 15 ? 15 + : idle_time > 3600 ? 3600 + : idle_time); +} + +/* Creates and returns a new mdb table with an initial mdb aging + * timeout of 'idle_time' seconds and an initial maximum of MDB_DEFAULT_MAX + * entries. */ +struct igmp_mdb* +igmp_snooping_create(unsigned int idle_time) +{ + struct igmp_mdb *mdb; + + mdb = xmalloc(sizeof *mdb); + list_init(&mdb->lrus); + hmap_init(&mdb->table); + mdb->secret = random_uint32(); + mdb->flood_vlans = NULL; + mdb->idle_time = normalize_idle_time(idle_time); + mdb->max_entries = MDB_DEFAULT_MAX; + return mdb; +} + +/* Destroys mdb snooping table 'mdb'. */ +void +igmp_snooping_destroy(struct igmp_mdb *mdb) +{ + if (mdb) { + struct mdb_entry *e, *next; + + HMAP_FOR_EACH_SAFE (e, next, hmap_node, &mdb->table) { + hmap_remove(&mdb->table, &e->hmap_node); + free(e); + } + hmap_destroy(&mdb->table); + + bitmap_free(mdb->flood_vlans); + free(mdb); + } +} + +/* Provides a bitmap of VLANs which have learning disabled, that is, VLANs on + * which all packets are flooded. Returns true if the set has changed from the + * previous value. */ +bool +igmp_mdb_set_flood_vlans(struct igmp_mdb *mdb, + const unsigned long *bitmap) +{ + if (vlan_bitmap_equal(mdb->flood_vlans, bitmap)) { + return false; + } else { + bitmap_free(mdb->flood_vlans); + mdb->flood_vlans = vlan_bitmap_clone(bitmap); + return true; + } +} + +/* Changes the mdb aging timeout of 'mdb' to 'idle_time' seconds. */ +void +igmp_mdb_set_idle_time(struct igmp_mdb *mdb, unsigned int idle_time) +{ + idle_time = normalize_idle_time(idle_time); + if (idle_time != mdb->idle_time) { + struct mdb_entry *e; + int delta; + + delta = (int) idle_time - (int) mdb->idle_time; + LIST_FOR_EACH (e, lru_node, &mdb->lrus) { + e->expires += delta; + } + mdb->idle_time = idle_time; + } +} + +/* Sets the maximum number of entries in 'mdb' to 'max_entries', adjusting it + * to be within a reasonable range. */ +void +igmp_mdb_set_max_entries(struct igmp_mdb *mdb, size_t max_entries) +{ + mdb->max_entries = (max_entries < 10 ? 10 + : max_entries > 1000 * 1000 ? 1000 * 1000 + : max_entries); +} + +static bool +is_learning_vlan(const struct igmp_mdb *mdb, uint16_t vlan) +{ + return !mdb->flood_vlans || !bitmap_is_set(mdb->flood_vlans, vlan); +} + +/* Returns true if 'dst' may be learned on 'vlan' for 'mdb'. + * Returns false if 'mdb' is NULL, if dst is not valid, or if + * 'vlan' is configured on 'mdb' to flood all packets. */ +bool +igmp_may_snoop(const struct igmp_mdb *mdb, const ovs_be32 dst, uint16_t vlan) +{ + return mdb && is_learning_vlan(mdb, vlan) && !ip_is_multicast(dst); +} + +/* Searches 'mdb' for and returns an mdb entry for 'grp' in 'vlan', + * inserting a new entry if necessary. The caller must have already verified, + * by calling mac_learning_may_learn(), that 'grp' and 'vlan' are + * learnable. + * + * If the returned mdb entry is new (as may be determined by calling + * mdb_entry_is_new()), then the caller must pass the new entry to + * igmp_mdb_changed(). The caller must also initialize the new entry's + * 'port' member. Otherwise calling those functions is at the caller's + * discretion. */ +struct mdb_entry * +igmp_mdb_insert(struct igmp_mdb *mdb, ovs_be32 grp, uint16_t vlan) +{ + struct mdb_entry *e; + + e = mdb_entry_lookup(mdb, grp, vlan); + if (!e) { + uint32_t hash = igmp_mdb_hash(mdb, grp, vlan); + + if (hmap_count(&mdb->table) >= mdb->max_entries) { + get_lru(mdb, &e); + igmp_mdb_expire(mdb, e); + } + + e = xmalloc(sizeof *e); + hmap_insert(&mdb->table, &e->hmap_node, hash); + e->group = grp; + e->vlan = vlan; + e->tag = 0; + } else { + list_remove(&e->lru_node); + } + + /* Mark 'e' as recently used. */ + list_push_back(&mdb->lrus, &e->lru_node); + e->expires = time_now() + mdb->idle_time; + + return e; +} + +/* Changes 'e''s tag to a new, randomly selected one, and returns the tag that + * would have been previously used for this entry's group and VLAN (either before + * 'e' was inserted, if it is new, or otherwise before its port was updated.) + * + * The client should call this function after obtaining a mdb entry + * from igmp_mdb_insert(), if the entry is either new or if its learned + * port has changed. */ +tag_type +igmp_mdb_changed(struct igmp_mdb *mdb, struct mdb_entry *e) +{ + tag_type old_tag = e->tag; + + COVERAGE_INC(igmp_snooping_learned); + + e->tag = tag_create_random(); + return old_tag ? old_tag : make_unknown_mdb_tag(mdb, e->group, e->vlan); +} + +/* Looks up group 'dst' for VLAN 'vlan' in 'mdb' and returns the associated + * mdb entry, if any. If 'tag' is nonnull, then the tag that associates + * 'dst' and 'vlan' with its currently learned port will be OR'd into + * '*tag'. */ +struct mdb_entry * +igmp_mdb_lookup(const struct igmp_mdb *mdb, ovs_be32 dst, + uint16_t vlan, tag_type *tag) +{ + if (!ip_is_multicast(dst)) { + /* No tag because the treatment of multicast destinations never + * changes. */ + return NULL; + } else if (!is_learning_vlan(mdb, vlan)) { + /* We don't tag this property. The set of learning VLANs changes so + * rarely that we revalidate every flow when it changes. */ + return NULL; + } else { + struct mdb_entry *e = mdb_entry_lookup(mdb, dst, vlan); + + ovs_assert(e == NULL || e->tag != 0); + if (tag) { + /* Tag either the learned port or the lack thereof. */ + *tag |= e ? e->tag : make_unknown_mdb_tag(mdb, dst, vlan); + } + return e; + } +} + +/* Expires 'e' from the 'mdb' hash table. */ +void +igmp_mdb_expire(struct igmp_mdb *mdb, struct mdb_entry *e) +{ + hmap_remove(&mdb->table, &e->hmap_node); + list_remove(&e->lru_node); + free(e); +} + +/* Expires all the mdb entries in 'mdb'. If not NULL, the tags in 'mdb' + * are added to 'tags'. Otherwise the tags in 'mdb' are discarded. The client + * is responsible for revalidating any flows that depend on 'mdb', if + * necessary. */ +void +igmp_mdb_flush(struct igmp_mdb *mdb, struct tag_set *tags) +{ + struct mdb_entry *e; + while (get_lru(mdb, &e)){ + if (tags) { + tag_set_add(tags, e->tag); + } + igmp_mdb_expire(mdb, e); + } + hmap_shrink(&mdb->table); +} + +void +igmp_mdb_delete(struct igmp_mdb *mdb, ovs_be32 group, int vlan) +{ + struct mdb_entry *mdb_entry; + + mdb_entry = mdb_entry_lookup(mdb, group, vlan); + if (mdb_entry) + igmp_mdb_expire(mdb, mdb_entry); +} + +void +igmp_snooping_run(struct igmp_mdb *mdb, struct tag_set *set) +{ + struct mdb_entry *e; + while (get_lru(mdb, &e) + && (hmap_count(&mdb->table) > mdb->max_entries + || time_now() >= e->expires)) { + COVERAGE_INC(igmp_snooping_expired); + if (set) { + tag_set_add(set, e->tag); + } + igmp_mdb_expire(mdb, e); + } +} + +void +igmp_snooping_wait(struct igmp_mdb *mdb) +{ + if (hmap_count(&mdb->table) > mdb->max_entries) { + poll_immediate_wake(); + } else if (!list_is_empty(&mdb->lrus)) { + struct mdb_entry *e = mdb_entry_from_lru_node(mdb->lrus.next); + poll_timer_wait_until(e->expires * 1000LL); + } +} diff --git a/lib/igmp-snooping.h b/lib/igmp-snooping.h new file mode 100644 index 0000000..d1f8fbb --- /dev/null +++ b/lib/igmp-snooping.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2013 Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef IGMP_SNOOPING_H +#define IGMP_SNOOPING_H 1 + +#include <time.h> +#include "hmap.h" +#include "list.h" +#include "packets.h" +#include "tag.h" +#include "timeval.h" + +struct igmp_mdb; + +/* Default maximum size of a mdb table, in entries. */ +#define MDB_DEFAULT_MAX 2048 + +/* Time, in seconds, before expiring a mdb_entry due to inactivity. */ +#define MDB_ENTRY_DEFAULT_IDLE_TIME 300 + +/* Multicast group entry. */ +struct mdb_entry { + struct hmap_node hmap_node; /* Node in a mdb hmap. */ + struct list lru_node; /* Element in 'lrus' list. */ + time_t expires; /* Expiration time. */ + ovs_be32 group; /* Known multicast group. */ + uint16_t vlan; /* VLAN tag. */ + tag_type tag; /* Tag for this entry. */ + + /* Learned port. */ + union { + void *p; + int i; + } port; +}; + +int mdb_entry_age(const struct igmp_mdb *, const struct mdb_entry *); + +/* Returns true if igmp_mdb_insert() just created 'mdb' and the caller has + * not yet properly initialized it. */ +static inline bool mdb_entry_is_new(const struct mdb_entry *mdb) +{ + return !mdb->tag; +} + +/* IGMP snooping table. */ +struct igmp_mdb { + struct hmap table; /* Learning table. */ + struct list lrus; /* In-use entries, least recently used at the + front, most recently used at the back. */ + uint32_t secret; /* Secret for randomizing hash table. */ + unsigned long *flood_vlans; /* Bitmap of learning disabled VLANs. */ + unsigned int idle_time; /* Max age before deleting an entry. */ + size_t max_entries; /* Max number of mdb entries. */ +}; + +/* Basics. */ +struct igmp_mdb *igmp_snooping_create(unsigned int idle_time); +void igmp_snooping_destroy(struct igmp_mdb *); + +void igmp_snooping_run(struct igmp_mdb *, struct tag_set *); +void igmp_snooping_wait(struct igmp_mdb *); + +/* Configuration. */ +bool igmp_mdb_set_flood_vlans(struct igmp_mdb *, + const unsigned long *bitmap); +void igmp_mdb_set_idle_time(struct igmp_mdb *, unsigned int idle_time); +void igmp_mdb_set_max_entries(struct igmp_mdb *, size_t max_entries); + +/* Learning. */ +bool igmp_may_snoop(const struct igmp_mdb *mdb, const ovs_be32 dst, + uint16_t vlan); + +struct mdb_entry *igmp_mdb_insert(struct igmp_mdb *mdb, ovs_be32 grp, + uint16_t vlan); +void igmp_mdb_delete(struct igmp_mdb *mdb, ovs_be32 group, int vlan); + +tag_type igmp_mdb_changed(struct igmp_mdb *mdb, struct mdb_entry *e); + +struct mdb_entry * +igmp_mdb_lookup(const struct igmp_mdb *mdb, ovs_be32 dst, + uint16_t vlan, tag_type *tag); + +/* Flushing. */ +void igmp_mdb_expire(struct igmp_mdb *mdb, struct mdb_entry *e); +void igmp_mdb_flush(struct igmp_mdb *mdb, struct tag_set *tags); + +#endif /* igmp-snooping.h */ diff --git a/lib/learning-switch.c b/lib/learning-switch.c index 4a95dc1..20c7e77 100644 --- a/lib/learning-switch.c +++ b/lib/learning-switch.c @@ -28,6 +28,7 @@ #include "flow.h" #include "hmap.h" #include "mac-learning.h" +#include "igmp-snooping.h" #include "ofpbuf.h" #include "ofp-actions.h" #include "ofp-errors.h" @@ -70,6 +71,7 @@ struct lswitch { enum ofputil_protocol protocol; unsigned long long int datapath_id; struct mac_learning *ml; /* NULL to act as hub instead of switch. */ + struct igmp_mdb *mdb; struct flow_wildcards wc; /* Wildcards to apply to flows. */ bool action_normal; /* Use OFPP_NORMAL? */ @@ -254,6 +256,10 @@ lswitch_run(struct lswitch *sw) mac_learning_run(sw->ml, NULL); } + if (sw->mdb) { + igmp_snooping_run(sw->mdb, NULL); + } + rconn_run(sw->rconn); if (sw->state == S_CONNECTING) { @@ -285,6 +291,11 @@ lswitch_wait(struct lswitch *sw) if (sw->ml) { mac_learning_wait(sw->ml); } + + if (sw->mdb) { + igmp_snooping_wait(sw->mdb); + } + rconn_run_wait(sw->rconn); rconn_recv_wait(sw->rconn); } diff --git a/lib/packets.h b/lib/packets.h index 0f97fe6..5c4995a 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -453,6 +453,21 @@ struct icmp_header { }; BUILD_ASSERT_DECL(ICMP_HEADER_LEN == sizeof(struct icmp_header)); +#define IGMP_HEADER_LEN 8 +struct igmp_header { + uint8_t igmp_type; + uint8_t igmp_code; + ovs_be16 igmp_csum; + ovs_be32 group; +}; +BUILD_ASSERT_DECL(IGMP_HEADER_LEN == sizeof(struct igmp_header)); + +#define IGMP_HOST_MEMBERSHIP_QUERY 0x11 /* From RFC1112 */ +#define IGMP_HOST_MEMBERSHIP_REPORT 0x12 /* Ditto */ +#define IGMPV2_HOST_MEMBERSHIP_REPORT 0x16 /* V2 version of 0x12 */ +#define IGMP_HOST_LEAVE_MESSAGE 0x17 +#define IGMPV3_HOST_MEMBERSHIP_REPORT 0x22 /* V3 version of 0x12 */ + #define UDP_HEADER_LEN 8 struct udp_header { ovs_be16 udp_src; diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 7035530..ec44e41 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -33,6 +33,7 @@ #include "lacp.h" #include "learn.h" #include "mac-learning.h" +#include "igmp-snooping.h" #include "meta-flow.h" #include "multipath.h" #include "netdev-vport.h" @@ -656,6 +657,7 @@ struct ofproto_dpif { struct dpif_sflow *sflow; struct hmap bundles; /* Contains "struct ofbundle"s. */ struct mac_learning *ml; + struct igmp_mdb *mdb; struct ofmirror *mirrors[MAX_MIRRORS]; bool has_mirrors; bool has_bonded_bundles; @@ -1209,6 +1211,7 @@ construct(struct ofproto *ofproto_) ofproto->stp = NULL; hmap_init(&ofproto->bundles); ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME); + ofproto->mdb = igmp_snooping_create(MDB_ENTRY_DEFAULT_IDLE_TIME); for (i = 0; i < MAX_MIRRORS; i++) { ofproto->mirrors[i] = NULL; } @@ -1502,6 +1505,7 @@ wait(struct ofproto *ofproto_) netflow_wait(ofproto->netflow); } mac_learning_wait(ofproto->ml); + igmp_snooping_wait(ofproto->mdb); stp_wait(ofproto); if (ofproto->backer->need_revalidate) { /* Shouldn't happen, but if it does just go around again. */ @@ -2794,6 +2798,8 @@ mirror_set(struct ofproto *ofproto_, void *aux, ofproto->has_mirrors = true; mac_learning_flush(ofproto->ml, &ofproto->backer->revalidate_set); + igmp_mdb_flush(ofproto->mdb, + &ofproto->backer->revalidate_set); mirror_update_dups(ofproto); return 0; @@ -2814,6 +2820,7 @@ mirror_destroy(struct ofmirror *mirror) ofproto = mirror->ofproto; ofproto->backer->need_revalidate = REV_RECONFIGURE; mac_learning_flush(ofproto->ml, &ofproto->backer->revalidate_set); + igmp_mdb_flush(ofproto->mdb, &ofproto->backer->revalidate_set); mirror_bit = MIRROR_MASK_C(1) << mirror->idx; HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) { @@ -2866,6 +2873,11 @@ set_flood_vlans(struct ofproto *ofproto_, unsigned long *flood_vlans) if (mac_learning_set_flood_vlans(ofproto->ml, flood_vlans)) { mac_learning_flush(ofproto->ml, &ofproto->backer->revalidate_set); } + + if (igmp_mdb_set_flood_vlans(ofproto->mdb, flood_vlans)) { + igmp_mdb_flush(ofproto->mdb, &ofproto->backer->revalidate_set); + } + return 0; } @@ -2892,6 +2904,15 @@ set_mac_table_config(struct ofproto *ofproto_, unsigned int idle_time, mac_learning_set_idle_time(ofproto->ml, idle_time); mac_learning_set_max_entries(ofproto->ml, max_entries); } + +static void +set_mdb_config(struct ofproto *ofproto_, unsigned int idle_time, + size_t max_entries) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + igmp_mdb_set_idle_time(ofproto->mdb, idle_time); + igmp_mdb_set_max_entries(ofproto->mdb, max_entries); +} /* Ports. */ @@ -7011,6 +7032,44 @@ update_learning_table(struct ofproto_dpif *ofproto, } } +static void +update_mdb_table(struct ofproto_dpif *ofproto, + const struct flow *flow, int vlan, + struct ofbundle *in_bundle) +{ + struct mdb_entry *mdb_entry; + + /* Don't learn the OFPP_NONE port. */ + if (in_bundle == &ofpp_none_bundle) { + return; + } + + if (!igmp_may_snoop(ofproto->mdb, flow->nw_dst, vlan)) { + return; + } + + if (ntohs(flow->tp_src) == IGMP_HOST_MEMBERSHIP_REPORT || + ntohs(flow->tp_src) == IGMPV2_HOST_MEMBERSHIP_REPORT) { + mdb_entry = igmp_mdb_insert(ofproto->mdb, flow->igmp_group, vlan); + + if (mdb_entry_is_new(mdb_entry) || mdb_entry->port.p != in_bundle) { + /* The log messages here could actually be useful in debugging, + * so keep the rate limit relatively high. */ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300); + VLOG_DBG_RL(&rl, "bridge %s: learned that "IP_FMT" is " + "on port %s in VLAN %d", + ofproto->up.name, IP_ARGS(flow->igmp_group), + in_bundle->name, vlan); + + mdb_entry->port.p = in_bundle; + tag_set_add(&ofproto->backer->revalidate_set, + igmp_mdb_changed(ofproto->mdb, mdb_entry)); + } + } else if (ntohs(flow->tp_src) == IGMP_HOST_LEAVE_MESSAGE) { + igmp_mdb_delete(ofproto->mdb, flow->igmp_group, vlan); + } +} + static struct ofbundle * lookup_input_bundle(const struct ofproto_dpif *ofproto, uint16_t in_port, bool warn, struct ofport_dpif **in_ofportp) @@ -7117,6 +7176,7 @@ xlate_normal(struct action_xlate_ctx *ctx) struct ofport_dpif *in_port; struct ofbundle *in_bundle; struct mac_entry *mac; + struct mdb_entry *mdb_entry; uint16_t vlan; uint16_t vid; @@ -7169,32 +7229,62 @@ xlate_normal(struct action_xlate_ctx *ctx) /* Learn source MAC. */ if (ctx->may_learn) { - update_learning_table(ctx->ofproto, &ctx->flow, vlan, in_bundle); - } - - /* Determine output bundle. */ - mac = mac_learning_lookup(ctx->ofproto->ml, ctx->flow.dl_dst, vlan, - &ctx->tags); - if (mac) { - if (mac->port.p != in_bundle) { - xlate_report(ctx, "forwarding to learned port"); - output_normal(ctx, mac->port.p, vlan); + if (eth_addr_is_multicast(ctx->flow.dl_dst) && + (ctx->flow.nw_proto == IPPROTO_IGMP)) + update_mdb_table(ctx->ofproto, &ctx->flow, vlan, in_bundle); + else + update_learning_table(ctx->ofproto, &ctx->flow, vlan, in_bundle); + } + + if (eth_addr_is_multicast(ctx->flow.dl_dst)) { + mdb_entry = igmp_mdb_lookup(ctx->ofproto->mdb, ctx->flow.nw_dst, vlan, + &ctx->tags); + if (mdb_entry) { + if (mdb_entry->port.p != in_bundle) { + xlate_report(ctx, "forwarding to learned port"); + output_normal(ctx, mdb_entry->port.p, vlan); + } else { + xlate_report(ctx, "learned port is input port, dropping"); + } } else { - xlate_report(ctx, "learned port is input port, dropping"); + struct ofbundle *bundle; + + xlate_report(ctx, "no learned multicast group for destination, flooding"); + HMAP_FOR_EACH (bundle, hmap_node, &ctx->ofproto->bundles) { + if (bundle != in_bundle + && ofbundle_includes_vlan(bundle, vlan) + && bundle->floodable + && !bundle->mirror_out) { + output_normal(ctx, bundle, vlan); + } + } + ctx->nf_output_iface = NF_OUT_FLOOD; } } else { - struct ofbundle *bundle; - - xlate_report(ctx, "no learned MAC for destination, flooding"); - HMAP_FOR_EACH (bundle, hmap_node, &ctx->ofproto->bundles) { - if (bundle != in_bundle - && ofbundle_includes_vlan(bundle, vlan) - && bundle->floodable - && !bundle->mirror_out) { - output_normal(ctx, bundle, vlan); + /* Determine output bundle. */ + mac = mac_learning_lookup(ctx->ofproto->ml, ctx->flow.dl_dst, vlan, + &ctx->tags); + if (mac) { + if (mac->port.p != in_bundle) { + xlate_report(ctx, "forwarding to learned port"); + output_normal(ctx, mac->port.p, vlan); + } else { + xlate_report(ctx, "learned port is input port, dropping"); } + } else { + struct ofbundle *bundle; + + xlate_report(ctx, "no learned MAC for destination, flooding"); + HMAP_FOR_EACH (bundle, hmap_node, &ctx->ofproto->bundles) { + if (bundle != in_bundle + && ofbundle_includes_vlan(bundle, vlan) + && bundle->floodable + && !bundle->mirror_out) { + output_normal(ctx, bundle, vlan); + } + } + ctx->nf_output_iface = NF_OUT_FLOOD; } - ctx->nf_output_iface = NF_OUT_FLOOD; } } @@ -7465,7 +7555,7 @@ ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED, { struct ds ds = DS_EMPTY_INITIALIZER; const struct ofproto_dpif *ofproto; - const struct mac_entry *e; + const struct mdb_entry *e; ofproto = ofproto_dpif_lookup(argv[1]); if (!ofproto) { @@ -7474,12 +7564,60 @@ ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED, } ds_put_cstr(&ds, " port VLAN MAC Age\n"); - LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) { + LIST_FOR_EACH (e, lru_node, &ofproto->mdb->lrus) { + struct ofbundle *bundle = e->port.p; + ds_put_format(&ds, "%5d %4d "IP_FMT" %3d\n", + ofbundle_get_a_port(bundle)->odp_port, + e->vlan, IP_ARGS(e->group), + mdb_entry_age(ofproto->mdb, e)); + } + unixctl_command_reply(conn, ds_cstr(&ds)); + ds_destroy(&ds); +} + +static void +ofproto_unixctl_mdb_flush(struct unixctl_conn *conn, int argc, + const char *argv[], void *aux OVS_UNUSED) +{ + struct ofproto_dpif *ofproto; + + if (argc > 1) { + ofproto = ofproto_dpif_lookup(argv[1]); + if (!ofproto) { + unixctl_command_reply_error(conn, "no such bridge"); + return; + } + igmp_mdb_flush(ofproto->mdb, &ofproto->backer->revalidate_set); + } else { + HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { + igmp_mdb_flush(ofproto->mdb, &ofproto->backer->revalidate_set); + } + } + + unixctl_command_reply(conn, "table successfully flushed"); +} + +static void +ofproto_unixctl_mdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED, + const char *argv[], void *aux OVS_UNUSED) +{ + struct ds ds = DS_EMPTY_INITIALIZER; + const struct ofproto_dpif *ofproto; + const struct mdb_entry *e; + + ofproto = ofproto_dpif_lookup(argv[1]); + if (!ofproto) { + unixctl_command_reply_error(conn, "no such bridge"); + return; + } + + ds_put_cstr(&ds, " port VLAN GROUP Age\n"); + LIST_FOR_EACH (e, lru_node, &ofproto->mdb->lrus) { struct ofbundle *bundle = e->port.p; - ds_put_format(&ds, "%5d %4d "ETH_ADDR_FMT" %3d\n", + ds_put_format(&ds, "%5d %4d "IP_FMT" %3d\n", ofbundle_get_a_port(bundle)->odp_port, - e->vlan, ETH_ADDR_ARGS(e->mac), - mac_entry_age(ofproto->ml, e)); + e->vlan, IP_ARGS(e->group), + mdb_entry_age(ofproto->mdb, e)); } unixctl_command_reply(conn, ds_cstr(&ds)); ds_destroy(&ds); @@ -8085,6 +8223,10 @@ ofproto_dpif_unixctl_init(void) ofproto_unixctl_fdb_flush, NULL); unixctl_command_register("fdb/show", "bridge", 1, 1, ofproto_unixctl_fdb_show, NULL); + unixctl_command_register("mdb/flush", "[bridge]", 0, 1, + ofproto_unixctl_mdb_flush, NULL); + unixctl_command_register("mdb/show", "bridge", 1, 1, + ofproto_unixctl_mdb_show, NULL); unixctl_command_register("ofproto/clog", "", 0, 0, ofproto_dpif_clog, NULL); unixctl_command_register("ofproto/unclog", "", 0, 0, @@ -8388,4 +8530,5 @@ const struct ofproto_class ofproto_dpif_class = { forward_bpdu_changed, set_mac_table_config, set_realdev, + set_mdb_config, }; diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index 95bda33..c2f9f23 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -1314,6 +1314,8 @@ struct ofproto_class { * it. */ int (*set_realdev)(struct ofport *ofport, uint16_t realdev_ofp_port, int vid); + void (*set_mdb_config)(struct ofproto *ofproto, + unsigned int idle_time, size_t max_entries); }; extern const struct ofproto_class ofproto_dpif_class; diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index a9c7e76..477df42 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -599,6 +599,16 @@ ofproto_set_mac_table_config(struct ofproto *ofproto, unsigned idle_time, } void +ofproto_set_mdb_config(struct ofproto *ofproto, unsigned idle_time, + size_t max_entries) +{ + if (ofproto->ofproto_class->set_mdb_config) { + ofproto->ofproto_class->set_mdb_config(ofproto, idle_time, + max_entries); + } +} + +void ofproto_set_dp_desc(struct ofproto *p, const char *dp_desc) { free(p->dp_desc); diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h index 3a66d1b..643b1b7 100644 --- a/ofproto/ofproto.h +++ b/ofproto/ofproto.h @@ -225,6 +225,8 @@ void ofproto_set_flow_eviction_threshold(struct ofproto *, unsigned threshold); void ofproto_set_forward_bpdu(struct ofproto *, bool forward_bpdu); void ofproto_set_mac_table_config(struct ofproto *, unsigned idle_time, size_t max_entries); +void ofproto_set_mdb_config(struct ofproto *, unsigned idle_time, + size_t max_entries); void ofproto_set_dp_desc(struct ofproto *, const char *dp_desc); int ofproto_set_snoops(struct ofproto *, const struct sset *snoops); int ofproto_set_netflow(struct ofproto *, diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index ab0ecd6..817e47f 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -32,6 +32,7 @@ #include "lacp.h" #include "list.h" #include "mac-learning.h" +#include "igmp-snooping.h" #include "meta-flow.h" #include "netdev.h" #include "ofp-print.h" @@ -1510,6 +1511,7 @@ bridge_configure_mac_table(struct bridge *br) : MAC_DEFAULT_MAX); ofproto_set_mac_table_config(br->ofproto, idle_time, mac_table_size); + ofproto_set_mdb_config(br->ofproto, idle_time, mac_table_size); } static void -- 1.7.7.6 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev